def _set_up_staging(self, transition):
        """Sets up staging ops for prefetching the next transition.

    This allows us to hide the py_func latency. To do so we use a staging area
    to pre-fetch the next batch of transitions.

    Args:
      transition: tuple of tf.Tensors with shape
        memory.get_transition_elements().

    Returns:
      prefetched_transition: tuple of tf.Tensors with shape
        memory.get_transition_elements() that have been previously prefetched.
    """
        transition_type = self.memory.get_transition_elements()

        # Create the staging area in CPU.
        prefetch_area = StagingArea(
            [shape_with_type.type for shape_with_type in transition_type])

        # Store prefetch op for tests, but keep it private -- users should not be
        # calling _prefetch_batch.
        self._prefetch_batch = prefetch_area.put(transition)
        initial_prefetch = tf.cond(tf.equal(prefetch_area.size(), 0),
                                   lambda: prefetch_area.put(transition),
                                   tf.no_op)

        # Every time a transition is sampled self.prefetch_batch will be
        # called. If the staging area is empty, two put ops will be called.
        with tf.control_dependencies([self._prefetch_batch, initial_prefetch]):
            prefetched_transition = prefetch_area.get()

        return prefetched_transition
示例#2
0
    def _get_input_tensors(self):
        inputs = self._input.get_input_tensors()

        with self._device_ctx():
            with self.cached_name_scope():
                # Putting variables to stagingarea will cause trouble
                dtypes = []
                for idx in range(len(inputs)):
                    dtype = inputs[idx].dtype
                    if dtype.base_dtype != dtype:  # is reference type
                        inputs[idx] = tf.identity(inputs[idx])
                    dtypes.append(dtype.base_dtype)

                # TODO tensorflow/benchmarks use static shapes here,
                # though it doesn't seem to help. We can use it when it's known.
                # Setting capacity to 1 to potentially save some memory, because we should
                # expect the consumers to run slower than the producer.
                stage = StagingArea(dtypes, shapes=None, capacity=1)

            # put & get automatically inherit the name scope from the area
            self._stage_ops.append(stage.put(inputs))
            self._areas.append(stage)
            outputs = stage.get()
            if isinstance(outputs,
                          tf.Tensor):  # when size=1, TF doesn't return a list
                outputs = [outputs]

            for vin, vout in zip(inputs, outputs):
                vout.set_shape(vin.get_shape())
            self._unstage_ops.append(outputs)
            # self._size_ops.append(stage.size())
            return outputs
示例#3
0
    def _get_input_tensors(self):
        with self.cached_name_scope(), self._device_ctx():
            inputs = self._input.get_input_tensors()

            # Putting variables to stagingarea will cause trouble
            dtypes = []
            for idx in range(len(inputs)):
                dtype = inputs[idx].dtype
                if dtype.base_dtype != dtype:     # is reference type
                    inputs[idx] = tf.identity(inputs[idx])
                dtypes.append(dtype.base_dtype)

            # TODO tensorflow/benchmarks use static shapes here,
            # though it doesn't seem to help. We can use it when it's known.
            stage = StagingArea(dtypes, shapes=None)

        # put & get automatically inherit the name scope from the area
        self._stage_ops.append(stage.put(inputs))
        self._areas.append(stage)
        outputs = stage.get()
        if isinstance(outputs, tf.Tensor):  # when size=1, TF doesn't return a list
            outputs = [outputs]
        for vin, vout in zip(inputs, outputs):
            vout.set_shape(vin.get_shape())
        self._unstage_ops.append(outputs)
        # self._size_ops.append(stage.size())
        return outputs
示例#4
0
    def _get_input_tensors(self):
        with self.cached_name_scope():
            inputs = self._input.get_input_tensors()

            # Putting variables to stagingarea will cause trouble
            dtypes = []
            for idx in range(len(inputs)):
                dtype = inputs[idx].dtype
                if dtype.base_dtype != dtype:  # is reference type
                    inputs[idx] = tf.identity(inputs[idx])
                dtypes.append(dtype.base_dtype)

            # TODO tensorflow/benchmarks use static shapes here,
            # though it doesn't seem to help. We can use it when it's known.
            stage = StagingArea(dtypes, shapes=None)
            self._stage_ops.append(stage.put(inputs))
            self._areas.append(stage)
            outputs = stage.get()
            if isinstance(outputs,
                          tf.Tensor):  # when size=1, TF doesn't return a list
                outputs = [outputs]
            for vin, vout in zip(inputs, outputs):
                vout.set_shape(vin.get_shape())
            self._unstage_ops.append(outputs)
            # self._size_ops.append(stage.size())
            return outputs
示例#5
0
    def _setup_staging_areas(self):
        logger.info("Setting up StagingArea for GPU prefetching ...")
        with self.cached_name_scope():
            for idx, device in enumerate(self._devices):
                with tf.device(device):
                    inputs = self._input.get_input_tensors()

                    # Putting variables to stagingarea will cause trouble
                    dtypes = []
                    for idx in range(len(inputs)):
                        dtype = inputs[idx].dtype
                        if dtype.base_dtype != dtype:  # is reference type
                            inputs[idx] = tf.identity(inputs[idx])
                        dtypes.append(dtype.base_dtype)

                    stage = StagingArea(dtypes, shapes=None)
                    self._stage_ops.append(stage.put(inputs))
                    self._areas.append(stage)
                    outputs = stage.get()
                    if isinstance(outputs, tf.Tensor
                                  ):  # when size=1, TF doesn't return a list
                        outputs = [outputs]
                    for vin, vout in zip(inputs, outputs):
                        vout.set_shape(vin.get_shape())
                    self._unstage_ops.append(outputs)
示例#6
0
 def _setup_staging_areas(self):
     logger.info("Setting up StagingArea for GPU prefetching ...")
     for idx, device in enumerate(self._devices):
         with tf.device(device):
             inputs = self._input.get_input_tensors()
             dtypes = [x.dtype for x in inputs]
             stage = StagingArea(dtypes, shapes=None)
             self._stage_ops.append(stage.put(inputs))
             self._areas.append(stage)
             outputs = stage.get()
             if isinstance(outputs, tf.Tensor):  # when size=1, TF doesn't return a list
                 outputs = [outputs]
             for vin, vout in zip(inputs, outputs):
                 vout.set_shape(vin.get_shape())
             self._unstage_ops.append(outputs)
示例#7
0
class CpuToDefaultDevStage(object):
  def __init__(self, input_data, names, dtypes, extern_data, data_keys):
    """
    :param dict[str,tf.Tensor] input_data:
    :param list[str] names: data_keys + extra info
    :param list[tf.DType|str] dtypes: corresponds to names
    :param ExternData extern_data:
    :param list[str] data_keys:
    """
    from TFUtil import post_control_dependencies

    # The device-scope when this gets called is the default device,
    # so everywhere where we want to do it on CPU, we have to specify it explicitly.
    # StagingArea can be used for async CPU->GPU transfer.
    # It will live on the current device by the current device scope, e.g. the GPU.
    self._tf_staging_area = StagingArea(names=names, dtypes=dtypes)

    with tf.device("/cpu:0"):
      self.staging_size = tf.Variable(0, trainable=False, dtype=tf.int32, name="staging_size")
      self._staging_size_init = tf.variables_initializer([self.staging_size])
      with tf.control_dependencies([tf.assign_add(self.staging_size, 1)]):
        self.stage_put_op = self._tf_staging_area.put(input_data)
      get_updates = [tf.assign_sub(self.staging_size, 1)]
    # This should run on the default device (GPU).
    self.stage_get_op = post_control_dependencies(self._tf_staging_area.get(), updates=get_updates)

    self.output_as_extern_data = ExternData(
      default_input=extern_data.default_input,
      default_target=extern_data.default_target,
      data={key: Data(**data.get_kwargs()) for (key, data) in data_keys})
    for key, data in self.output_as_extern_data.data.items():
      data.placeholder = self.stage_get_op[key]
      data.size_placeholder = {
        axis: self.stage_get_op["%s/size%i" % (key, axis)]
        for axis in data.get_axes_with_size()}

  def loop(self, parent, coord, session):
    """
    :param QueueDataProvider parent:
    :param tf.train.Coordinator coord:
    :param tf.Session session:
    """
    with coord.stop_on_exception():
      while parent.have_more_data(session=session):
        session.run(self.stage_put_op)
示例#8
0
def get_stage_op(next_batch):
    """
    shortcut to create a staging area and associated stage operation

    It is usefull for high performance model where we want to stage the next batch in the gpu memory
    in parallel of processing the current batch

    This is just a shortcut to avoid code duplication (it's not a real generic implementation right now)
    :param next_batch: a batch as a dictionary
    :return:  (stage put op, next_batch_from_stage_area tensor)
    """
    names = list(next_batch.keys())
    shapes = [i.shape for i in next_batch.values()]
    dtypes = [i.dtype for i in next_batch.values()]
    m = StagingArea(dtypes=dtypes, names=names, shapes=shapes, capacity=4)
    stage_put_op = m.put(next_batch)
    next_batch_from_stage_area = m.get()
    return stage_put_op, next_batch_from_stage_area,
示例#9
0
 def connect(self):
     assert hasattr(self, 'single_sample'), 'Pipeline needs to have single_sample defined before connecting'
     with tf.device('/cpu:0'):
         self.single_sample.set_shape(self.shape)
         ims = tf.train.shuffle_batch(
             [self.single_sample],
             self.batch_size,
             capacity=self.read_batch,
             min_after_dequeue=self.read_batch//8,
             num_threads=16,
             enqueue_many=len(self.shape) == 4)
         ims = self._transform(ims)
         images_shape = ims.get_shape()
         image_producer_stage = StagingArea(dtypes=[tf.float32], shapes=[images_shape])
         image_producer_op = image_producer_stage.put([ims])
         image_producer_stage_get = image_producer_stage.get()[0]
         images = tf.tuple([image_producer_stage_get], control_inputs=[image_producer_op])[0]
     return images
示例#10
0
    def __init__(self, *args, **kwargs):
        super(DataFlow, self).__init__(*args, **kwargs)
        self.pattern = 'tf_records_train/train*'

        cpu_device = '/cpu:0'

        # Preprocessing
        with tf.device(cpu_device):
            file_pattern = os.path.join(self.data_dir, self.pattern)
            record_input = RecordInput(file_pattern=file_pattern,
                                       seed=Record_seed,
                                       parallelism=32,
                                       buffer_size=4000,
                                       batch_size=self.batch_size,
                                       shift_ratio=0,
                                       name='record_input')
            records = record_input.get_yield_op()
            records = tf.split(records, self.batch_size, 0)
            records = [tf.reshape(record, []) for record in records]
            images = []
            labels = []
            for idx in xrange(self.batch_size):
                value = records[idx]
                if self.with_labels:
                    image, label = self.parse_example_proto_and_process(value)
                    labels.append(label)
                else:
                    image = self.parse_example_proto_and_process(value)
                images.append(image)
            if self.with_labels:
                labels = tf.parallel_stack(labels, 0)
                labels = tf.reshape(labels, [self.batch_size])

            images = tf.parallel_stack(images)
            images = tf.reshape(images,
                                shape=[
                                    self.batch_size, self.output_size,
                                    self.output_size, self.c_dim
                                ])

            if self.format == 'NCHW':
                images = tf.transpose(images, [0, 3, 1, 2])
            images_shape = images.get_shape()
            if self.with_labels:
                labels_shape = labels.get_shape()
                image_producer_stage = StagingArea(
                    dtypes=[tf.float32, tf.int32],
                    shapes=[images_shape, labels_shape])
                image_producer_op = image_producer_stage.put([images, labels])
                image_producer_stage_get = image_producer_stage.get()
                images_and_labels = tf.tuple(
                    [image_producer_stage_get[0], image_producer_stage_get[1]],
                    control_inputs=[image_producer_op])
                images = images_and_labels[0]
                labels = images_and_labels[1]
            else:
                image_producer_stage = StagingArea(dtypes=[tf.float32],
                                                   shapes=[images_shape])
                image_producer_op = image_producer_stage.put([images])
                image_producer_stage_get = image_producer_stage.get()[0]
                images = tf.tuple([image_producer_stage_get],
                                  control_inputs=[image_producer_op])[0]

        self.images = images
        self.image_producer_op = image_producer_op
        if self.format == 'NCHW':
            self.shape = [self.c_dim, self.output_size, self.output_size]
        elif self.format == 'NHWC':
            self.shape = [self.output_size, self.output_size, self.c_dim]
        if self.with_labels:
            self.labels = labels
示例#11
0
from tensorflow.python.ops.data_flow_ops import StagingArea
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

length = 10
dataset_range = tf.data.Dataset.range(length)
iter = dataset_range.make_one_shot_iterator()
next_item = iter.get_next()

with tf.device('cpu:0'):
    a = tf.Variable([1], dtype=tf.int64)

    area = StagingArea(dtypes=[tf.int64])
    area_put = area.put([next_item])
    area_get = area.get()[0]
    area_size = area.size()
    area_get_put = tf.tuple([area_get], control_inputs=[area_put])[0]

    b = a + area_get
    c = b + area_get

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    # first item, just put()
    print('put:', sess.run(area_put))
    # get() & put()
    for i in range(length - 1):
        # this works as "semicolon"
        print(sess.run(c))
        print('put(); get() =', sess.run(area_put))