示例#1
0
    def __init__(self, output_path, train_dataset: tf.data.Dataset,
                 valid_dataset: tf.data.Dataset):
        self.writer = tf.contrib.summary.create_file_writer(output_path)
        self.global_batch = 0

        self.train_x, self.train_y = train_dataset.make_one_shot_iterator(
        ).get_next()
        self.valid_x, self.valid_y = valid_dataset.make_one_shot_iterator(
        ).get_next()
示例#2
0
def compute_mean_std(data: tf.data.Dataset):
    data = data.map(lambda x: x['image']).batch(1024).prefetch(1)
    data = data.make_one_shot_iterator().get_next()
    count = 0
    stats = []
    with tf.Session(config=utils.get_config()) as sess:

        def iterator():
            while True:
                try:
                    yield sess.run(data)
                except tf.errors.OutOfRangeError:
                    break

        for batch in tqdm(iterator(),
                          unit='kimg',
                          desc='Computing dataset mean and std'):
            ratio = batch.shape[0] / 1024.
            count += ratio
            stats.append((batch.mean((0, 1, 2)) * ratio, (batch**2).mean(
                (0, 1, 2)) * ratio))
    mean = sum(x[0] for x in stats) / count
    sigma = sum(x[1] for x in stats) / count - mean**2
    std = np.sqrt(sigma)
    print('Mean %s  Std: %s' % (mean, std))
    return mean, std
示例#3
0
def _get_data_results(data: tf.data.Dataset, session_manager,
                      max_iteration=None) -> dict:
    iterator = data.make_one_shot_iterator()
    sample = iterator.get_next()
    outputs_flatten = {}
    iteration_number = 0
    with session_manager as sess:
        while True:
            try:
                sample_out = sess.run(sample)
                sample_out_flatten = nest_utils.flatten_nested_struct(
                    sample_out)
                for k, v in sample_out_flatten.items():
                    outputs_flatten.setdefault(k, [])
                    if isinstance(v, bytes):
                        v = v.decode()
                    outputs_flatten[k].append(v)
                iteration_number += 1
            except tf.errors.OutOfRangeError:
                break

            if max_iteration is not None and iteration_number >= max_iteration:
                break
    outputs = nest_utils.unflatten_dict_to_nested(outputs_flatten)
    return outputs
示例#4
0
def memoize(dataset: tf.data.Dataset) -> tf.data.Dataset:
    data = []
    with tf.Graph().as_default(), tf.Session(
            config=utils.get_config()) as session:
        dataset = dataset.prefetch(16)
        it = dataset.make_one_shot_iterator().get_next()
        try:
            while 1:
                data.append(session.run(it))
        except tf.errors.OutOfRangeError:
            pass
    images = np.stack([x['image'] for x in data])
    labels = np.stack([x['label'] for x in data])

    def tf_get(index):
        def get(index):
            return images[index], labels[index]

        image, label = tf.py_func(get, [index], [tf.float32, tf.int64])
        return dict(image=image, label=label)

    dataset = tf.data.Dataset.range(len(data)).repeat()
    dataset = dataset.shuffle(
        len(data) if len(data) < FLAGS.shuffle else FLAGS.shuffle)
    return dataset.map(tf_get)
示例#5
0
def train_fn(ds: tf.data.Dataset,
             batch_size=1,
             shuffle=10000,
             repeat: int = None):
    '''Create input function for training, prediction, evaluation.'''

    if shuffle:
        ds = ds.shuffle(shuffle)
    ds = ds.batch(batch_size)
    if repeat != 1:
        ds = ds.repeat(repeat)

    return lambda: ds.make_one_shot_iterator().get_next()
示例#6
0
def compare_datasets_graph_mode(original_dataset: tf.data.Dataset,
                                dataset_from_stream: tf.data.Dataset) -> int:
    next_element_from_stream = dataset_from_stream.make_one_shot_iterator(
    ).get_next()
    next_element_from_orig = original_dataset.make_one_shot_iterator(
    ).get_next()
    data_samples = 0

    with tf.Session() as sess:
        while True:
            try:
                element_from_stream = sess.run(next_element_from_stream)
                element_from_dataset = sess.run(next_element_from_orig)
                assert element_from_stream["label"] == element_from_dataset[
                    "label"]
                assert np.array_equal(element_from_stream["image"],
                                      element_from_dataset["image"])
                data_samples += 1
            except tf.errors.OutOfRangeError:
                break

    return data_samples
示例#7
0
def create_dataset_iter(dataset: tf.data.Dataset):
    """ create dataset iter

    Parameters
    ----------
    dataset : tf.data.Dataset

    Returns
    -------
    dataset iter
    """
    data_it = dataset.make_one_shot_iterator()
    # 定义个获取下一组数据的操作(operator)
    return data_it.get_next()
示例#8
0
def print_dataset(dataset: tf.data.Dataset):
    next_record = dataset.make_one_shot_iterator().get_next()
    counter = 0
    with tf.Session() as sess:
        while True:
            try:
                record = sess.run(next_record)
                example = tf.train.Example.FromString(record)
                if counter < 10:
                    print(example)
                counter += 1
            except tf.errors.OutOfRangeError:
                break
    print("total examples: " + str(counter))
示例#9
0
def bias_ops(ds: tf.data.Dataset, V):
    features, labels = ds.make_one_shot_iterator().get_next()
    tokens = features[TEXT]  # (N, L)
    token_lengths = features[SENTENCE_LENGTH]  # (N,)
    vocab_tally = tf.get_local_variable(
        name='vocab_tally',
        dtype=tf.int64,
        initializer=tf.initializers.zeros,
        shape=(V,)
    )  # (V,)
    word_count = tf.get_local_variable(
        name='word_count',
        dtype=token_lengths.dtype,
        initializer=tf.initializers.zeros,
        shape=[]
    )
    max_length = tf.get_local_variable(
        name='max_length',
        dtype=token_lengths.dtype,
        initializer=tf.initializers.zeros,
        shape=[]
    )
    sentence_count = tf.get_local_variable(
        name='sentence_count',
        dtype=tf.int32,
        initializer=tf.initializers.zeros,
        shape=[]
    )
    mask = tf.sequence_mask(
        maxlen=tf.shape(tokens)[1],
        lengths=token_lengths
    )  # (N, L)
    valid_tokens = tf.boolean_mask(tensor=tokens, mask=mask)  # (Z,)
    update_tally = tf.scatter_nd_add(
        ref=vocab_tally,
        indices=tf.expand_dims(valid_tokens, 1),
        updates=tf.ones(shape=tf.shape(valid_tokens), dtype=vocab_tally.dtype)
    )
    update_sentence_count = tf.assign_add(ref=sentence_count, value=tf.shape(tokens)[0])
    update_word_count = tf.assign_add(ref=word_count, value=tf.reduce_sum(token_lengths))
    update_max_length = tf.assign(ref=max_length, value=tf.maximum(
        max_length,
        tf.reduce_max(token_lengths)
    ))
    update = tf.group(update_tally, update_sentence_count, update_word_count, update_max_length)
    return vocab_tally, sentence_count, word_count, max_length, update
示例#10
0
def load_dataset(dataset: tf.data.Dataset) -> Dict[str, np.ndarray]:
    """Given a TensorFlow dataset, load it into memory as numpy arrays.

  Args:
    dataset: input dataset with some finite size.

  Returns:
    Dict of numpy arrays with concatenated data from the full input dataset.
  """
    tensors = dataset.make_one_shot_iterator().get_next()
    metrics = {
        k: tf.contrib.metrics.streaming_concat(v)
        for k, v in tensors.items()
    }
    initializer = tf.local_variables_initializer()
    with tf.Session(config=_disable_rewrite_config()) as sess:
        return evaluate_metrics(sess, initializer, metrics)
示例#11
0
文件: anim.py 项目: jharrang/fenwicks
def show_dataset(
    ds: tf.data.Dataset,
    n_batch: int = 1,
    n_img: int = 10,
    converter: Callable = vision.transform.reverse_imagenet_normalize_tf
) -> FuncAnimation:
    X = []
    data_op = ds.make_one_shot_iterator().get_next()

    with tf.Session() as sess:
        for _ in range(n_batch):
            x, _ = sess.run(data_op)
            if len(x) >= n_img:
                X.extend(converter(x[:n_img]))
                break
            X.extend(converter(x))
            n_img -= len(x)

    X = np.clip(np.array(X), 0.0, 1.0)
    return images_anim(X)
示例#12
0
def iter_first_x(dataset: tf.data.Dataset, num_batches: int):
    """
     Return a generator for the first num_batches batches in a given dataset
    :param dataset: tf.data.Dataset object
    :param num_batches: number of batches
    :return:
    """
    counter = 0

    # creating one shot iterator ops in same graph as dataset ops
    # TODO: this will keep adding iterator ops in the same graph every time this function is being called, need
    #  better solution (reinitializable iterator)
    # pylint: disable=protected-access
    with dataset._graph.as_default():

        iterator = dataset.make_one_shot_iterator()
        # create iterator graph element.  This does not actually get the data, but makes a tensor which must be
        # evaluated.
        next_element = iterator.get_next()

    # creating session with graph that has dataset and iterator ops
    with tf.compat.v1.Session(graph=dataset._graph) as sess:

        while counter < num_batches:
            try:
                batch_data = sess.run(next_element)

            except tf.errors.OutOfRangeError:
                # if no data remains in the dataset, instead of throwing out of range error,
                # treat it as if it were the end of the iteration
                logger.info(
                    'dataset ran out of elements, stopping after %s batches',
                    counter)
                break

            yield batch_data
            counter += 1

    # close session
    sess.close()
    def fit_dataset(self, dataset: tf.data.Dataset):
        with self._graph.as_default():
            dataset = dataset.batch(self.batch_size).prefetch(self.batch_size)
            iterator = dataset.make_one_shot_iterator()
            with tf.Session() as self._sess:
                batch_n = 0
                while True:
                    try:
                        self._load_or_init_session()

                        batch = iterator.get_next()
                        stack_batch_op = tf.stack(batch)
                        stacked_batch = self._sess.run(stack_batch_op)

                        # if batch size is different from the specified batch size the fitting network won't work due to mismatching shapes
                        if len(stacked_batch) != self.batch_size:
                            logging.warning(
                                "Ignored last batch because it was smaller than the specified batch size. To avoid this choose "
                                "a batch size that is a factor of the dataset size."
                            )
                            break

                        for step in range(self.epochs):
                            self._sess.run(
                                self.train_step,
                                feed_dict={self._x_batch: stacked_batch})

                            if step + 1 % 10 == 0:
                                self._write_summaries(stacked_batch)
                                self._saver.save(self._sess,
                                                 self.save_file,
                                                 global_step=self.global_step)

                            self._log_progress(batch_n, step, stacked_batch)

                        batch_n += 1

                    except tf.errors.OutOfRangeError:
                        break
示例#14
0
def read_dataset(dataset: tf.data.Dataset) -> Tuple[float, int]:
    dataset = dataset.apply(
        tf.data.experimental.map_and_batch(dataset_parser,
                                           batch_size=1,
                                           num_parallel_batches=2,
                                           drop_remainder=True))
    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    next_element_from_dataset = dataset.make_one_shot_iterator().get_next()

    with tf.Session() as sess:
        data_samples = 0
        dataset_read_start_time = time.time()

        while True:
            try:
                sess.run(next_element_from_dataset)
                data_samples += 1
            except tf.errors.OutOfRangeError:
                break

        dataset_read_time = time.time() - dataset_read_start_time

    return dataset_read_time, data_samples
示例#15
0
def sample_generator(dataset: tf.data.Dataset, n_workers: int = 4):
    """TF dataset -> python generator

    Args:
        dataset:
        n_workers:

    Thanks to @velikodniy

    """
    iterator = dataset.make_one_shot_iterator()
    handlers = iterator.get_next()
    with tf.Session() as sess:

        @background(n_workers)
        def load():
            try:
                while True:
                    yield sess.run(handlers)
            except tf.errors.OutOfRangeError:
                return

        yield from load()
示例#16
0
文件: dataset.py 项目: bcho/homework
    def draw_images(self, ds: tf.data.Dataset, n=9):
        """Draw images from dataset.

        Args:
            ds: dataset
            n: first most n images to draw
        """
        import matplotlib.pyplot as plt

        cols = 3
        rows = n // cols
        n = rows * cols
        fig, ax = plt.subplots(ncols=cols, nrows=rows)

        it = ds.make_one_shot_iterator()
        b = it.get_next()
        i = 0
        with tf.Session() as s:
            while True:
                if i >= n:
                    break
                try:
                    image, label = s.run(b)
                except tf.errors.OutOfRangeError:
                    break
                class_idx = next(
                    idx for idx, i in enumerate(label[0]) if i == 1)
                class_name = self.image_classes[class_idx]

                image_data = np.asarray(image).astype(np.uint8)
                image_data = np.reshape(image_data, (224, 224, 3))
                image_fig = ax[i // 3, i % 3]
                image_fig.imshow(image_data)
                image_fig.set_title(class_name)
                i = i + 1
        fig.tight_layout()
示例#17
0
    def draw_images(self, ds: tf.data.Dataset, n=9):
        """Draw images from dataset.

        Args:
            ds: dataset
            n: first most n images to draw
        """
        import matplotlib.pyplot as plt

        cols = 3
        rows = n // cols
        n = rows * cols
        fig, ax = plt.subplots(ncols=cols, nrows=rows)

        it = ds.make_one_shot_iterator()
        b = it.get_next()
        i = 0
        with tf.Session() as s:
            while True:
                if i >= n:
                    break
                try:
                    image, label = s.run(b)
                except tf.errors.OutOfRangeError:
                    break
                class_idx = next(idx for idx, i in enumerate(label[0])
                                 if i == 1)
                class_name = self.image_classes[class_idx]

                image_data = np.asarray(image).astype(np.uint8)
                image_data = np.reshape(image_data, (224, 224, 3))
                image_fig = ax[i // 3, i % 3]
                image_fig.imshow(image_data)
                image_fig.set_title(class_name)
                i = i + 1
        fig.tight_layout()
示例#18
0
def get_first_batch(dataset: tf.data.Dataset):
    iterator = dataset.make_one_shot_iterator()
    first_batch = iterator.get_next()

    return first_batch
示例#19
0
    def get_sub_sampled_data(
            cls, orig_layer: Layer, pruned_layer: Layer, inp_op_names: List,
            orig_layer_db: LayerDatabase, comp_layer_db: LayerDatabase,
            data_set: tf.data.Dataset, batch_size: int,
            num_reconstruction_samples: int) -> (np.ndarray, np.ndarray):

        # pylint: disable=too-many-arguments
        # pylint: disable=too-many-locals
        """
        Get all the input data from pruned model and output data from original model

        :param orig_layer: layer in original model database
        :param pruned_layer: layer in pruned model database
        :param inp_op_names : input Op names, should be same in both models
        :param orig_layer_db: original model database, un-pruned, used to provide the actual outputs
        :param comp_layer_db: comp. model database, this is potentially already pruned in the upstreams layers of given
         layer name
        :param data_set: tf.data.Dataset object
        :param batch_size : batch size
        :param num_reconstruction_samples: The number of reconstruction samples
        :return: input_data, output_data
        """

        # Grow GPU memory as needed at the cost of fragmentation.
        config = tf.compat.v1.ConfigProto()
        config.gpu_options.allow_growth = True  # pylint: disable=no-member

        # create an iterator and iterator.get_next() Op in the same graph as dataset
        # TODO: currently dataset (user provided) and iterator are in the same graph, and the iterator is
        #  being created every time this function is called. Use re-initialize iterator
        sess = tf.compat.v1.Session(graph=data_set._graph, config=config)  # pylint: disable=protected-access

        with sess.graph.as_default():

            iterator = data_set.make_one_shot_iterator()
            next_element = iterator.get_next()

        # hard coded value
        samples_per_image = 10

        total_num_of_images = int(num_reconstruction_samples /
                                  samples_per_image)

        # number of possible batches - round up
        num_of_batches = math.ceil(total_num_of_images / batch_size)

        all_sub_sampled_inp_data = list()
        all_sub_sampled_out_data = list()

        for _ in range(num_of_batches):

            try:
                # get the data
                batch_data = sess.run(next_element)

                # output data from original model
                feed_dict = aimet_tensorflow.utils.common.create_input_feed_dict(
                    orig_layer_db.model.graph, inp_op_names, batch_data)
                output_data = orig_layer_db.model.run(
                    orig_layer.module.outputs[0], feed_dict=feed_dict)

                # input data from compressed model
                feed_dict = aimet_tensorflow.utils.common.create_input_feed_dict(
                    comp_layer_db.model.graph, inp_op_names, batch_data)
                input_data = comp_layer_db.model.run(
                    pruned_layer.module.inputs[0], feed_dict=feed_dict)

                # get the layer attributes (kernel_size, stride, padding)
                layer_attributes = aimet_tensorflow.utils.op.conv.get_layer_attributes(
                    sess=orig_layer_db.model,
                    op=orig_layer.module,
                    input_op_names=orig_layer_db.starting_ops,
                    input_shape=orig_layer_db.input_shape)

                # channels_last (NHWC) to channels_first data format (NCHW - Common format)
                input_data = np.transpose(input_data, (0, 3, 1, 2))
                output_data = np.transpose(output_data, (0, 3, 1, 2))

                # get the sub sampled input and output data
                sub_sampled_inp_data, sub_sampled_out_data = InputMatchSearch.subsample_data(
                    layer_attributes, input_data, output_data,
                    samples_per_image)
                all_sub_sampled_inp_data.append(sub_sampled_inp_data)
                all_sub_sampled_out_data.append(sub_sampled_out_data)

            except tf.errors.OutOfRangeError:

                raise StopIteration(
                    "There are insufficient batches of data in the provided dataset for the purpose of"
                    " weight reconstruction! Either reduce number of reconstruction samples or increase"
                    " data in dataset")

        # close the session
        sess.close()

        # accumulate total sub sampled input and output data
        return np.vstack(all_sub_sampled_inp_data), np.vstack(
            all_sub_sampled_out_data)
示例#20
0
 def __input_fn(dataset: tf.data.Dataset):
     return dataset.make_one_shot_iterator().get_next()
示例#21
0
def dataset_to_tensor(x: tf.data.Dataset):
    return x.make_one_shot_iterator().get_next()