Пример #1
0
  def testMalformedSparseFeatures(self):
    tensors = {
        'a': tf.sparse_placeholder(tf.int64),
    }

    # Invalid indices.
    schema = self.toSchema({
        'a': tf.SparseFeature('idx', 'val', tf.float32, 10)
    })
    instances = [{'a': ([-1, 2], [1.0, 2.0])}]
    with self.assertRaisesRegexp(
        ValueError, 'has index .* out of range'):
      _ = impl_helper.make_feed_dict(tensors, schema, instances)

    instances = [{'a': ([11, 1], [1.0, 2.0])}]
    with self.assertRaisesRegexp(
        ValueError, 'has index .* out of range'):
      _ = impl_helper.make_feed_dict(tensors, schema, instances)

    # Indices and values of different lengths.
    schema = self.toSchema({
        'a': tf.SparseFeature('idx', 'val', tf.float32, 10)
    })
    instances = [{'a': ([1, 2], [1])}]
    with self.assertRaisesRegexp(
        ValueError, 'indices and values of different lengths'):
      _ = impl_helper.make_feed_dict(tensors, schema, instances)

    # Tuple of the wrong length.
    instances = [{'a': ([1], [2], [3])}]
    with self.assertRaisesRegexp(
        ValueError, 'too many values to unpack'):
      _ = impl_helper.make_feed_dict(tensors, schema, instances)
Пример #2
0
 def test_make_feed_dict_error(self,
                               feature_spec,
                               instances,
                               error_msg,
                               error_type=ValueError):
     tensors = tf.parse_example(tf.placeholder(tf.string, [None]),
                                feature_spec)
     schema = dataset_schema.from_feature_spec(feature_spec)
     with self.assertRaisesRegexp(error_type, error_msg):
         impl_helper.make_feed_dict(tensors, schema, instances)
Пример #3
0
 def testMakeFeedDictError(self):
     # Missing features.
     tensors = {
         'a': tf.placeholder(tf.int64),
         'b': tf.placeholder(tf.int64)
     }
     schema = self.toSchema({
         'a': tf.FixedLenFeature([1], tf.int64),
         'b': tf.FixedLenFeature([1], tf.int64)
     })
     instances = [{'a': 100}]
     with self.assertRaises(KeyError):
         impl_helper.make_feed_dict(tensors, schema, instances)
Пример #4
0
  def _handle_batch(self, batch):
    self._batch_size_distribution.update(len(batch))
    self._num_instances.inc(len(batch))

    # Making a copy of batch because mutating PCollection elements is not
    # allowed.
    if self._passthrough_keys:
      batch = [copy.copy(x) for x in batch]
    # Extract passthrough data.
    passthrough_data = {
        key: [instance.pop(key) for instance in batch
             ] for key in self._passthrough_keys
    }

    feed_dict = impl_helper.make_feed_dict(self._graph_state.inputs,
                                           self._input_schema, batch)

    try:
      result = self._graph_state.session.run(
          self._graph_state.outputs, feed_dict=feed_dict)
    except Exception as e:
      tf.logging.error('%s while applying transform function for tensors %s' %
                       (e, self._graph_state.outputs))
      raise

    for key, value in six.iteritems(passthrough_data):
      result[key] = value

    return result
Пример #5
0
    def process(self, element, saved_model_dir):
        """Runs the given graph to realize the output tensors (i.e. features).

    Runs the graph in a TF session for computing the output values of the
    tensors, given an input row of data (input tensors). Due to the record-by
    record nature of beam we are operating sess.run() on individual record
    tensors vs batched tensors.

    Args:
      element: the element being processed by the DoFn
      saved_model_dir: Directory containing saved model.

    Yields:
      A representation of output features as a dict mapping keys (logical column
      names) to values.
    """
        try:
            element = element.element
        except AttributeError:
            pass
        if saved_model_dir != self._saved_model_dir:
            self._initialize_graph(saved_model_dir)
        feed_dict = impl_helper.make_feed_dict(self._inputs,
                                               self._input_schema, element)
        fetched_dict = self._session.run(self._outputs, feed_dict=feed_dict)
        yield impl_helper.make_output_dict(self._output_schema, fetched_dict)
Пример #6
0
    def process(self, element, saved_model_dir):
        """Runs the given graph to realize the output tensors (i.e. features).

    Runs the graph in a TF session for computing the output values of the
    tensors, given an input row of data (input tensors). Due to the record-by
    record nature of beam we are operating sess.run() on individual record
    tensors vs batched tensors.

    Args:
      element: the element being processed by the DoFn
      saved_model_dir: Directory containing saved model.

    Yields:
      A representation of output features as a dict mapping keys (logical column
      names) to values.
    """
        if (not hasattr(self._thread_local, 'graph_state')
                or self._thread_local.graph_state.saved_model_dir !=
                saved_model_dir):
            self._num_graph_loads.inc(1)
            self._thread_local.graph_state = self._GraphState(
                saved_model_dir, self._input_schema, self._output_schema)

        feed_dict = impl_helper.make_feed_dict(
            self._thread_local.graph_state.inputs, self._input_schema, element)
        fetched_dict = self._thread_local.graph_state.session.run(
            self._thread_local.graph_state.outputs, feed_dict=feed_dict)
        yield impl_helper.make_output_dict(self._output_schema, fetched_dict)
Пример #7
0
 def test_make_feed_dict(self, feature_spec, instances, feed_dict):
   tensors = tf.parse_example(tf.placeholder(tf.string, [None]), feature_spec)
   schema = dataset_schema.from_feature_spec(feature_spec)
   # feed_dict contains feature names as keys, replace these with the
   # actual tensors.
   feed_dict = {tensors[key]: value for key, value in feed_dict.items()}
   np.testing.assert_equal(
       impl_helper.make_feed_dict(tensors, schema, instances),
       feed_dict)
Пример #8
0
    def _flush_batch(self):
        self._batch_size_distribution.update(len(self._batch))
        self._num_instances.inc(len(self._batch))

        feed_dict = impl_helper.make_feed_dict(self._graph_state.inputs,
                                               self._input_schema, self._batch)
        del self._batch[:]

        return self._graph_state.session.run(self._graph_state.outputs,
                                             feed_dict=feed_dict)
Пример #9
0
  def _handle_batch(self, batch):
    self._batch_size_distribution.update(len(batch))
    self._num_instances.inc(len(batch))

    feed_dict = impl_helper.make_feed_dict(self._graph_state.inputs,
                                           self._input_schema, batch)

    try:
      return self._graph_state.session.run(
          self._graph_state.outputs, feed_dict=feed_dict)
    except Exception as e:
      tf.logging.error('%s while applying transform function for tensors %s' %
                       (e, self._graph_state.outputs))
      raise
Пример #10
0
    def testMakeFeedDict(self):
        tensors = {
            'a': tf.placeholder(tf.int64),
            'b': tf.placeholder(tf.float32),
            'c': tf.placeholder(tf.float32),
            'd': tf.placeholder(tf.float32),
            'e': tf.sparse_placeholder(tf.string),
            'f': tf.sparse_placeholder(tf.float32)
        }
        schema = self.toSchema({
            'a':
            tf.FixedLenFeature(None, tf.int64),
            'b':
            tf.FixedLenFeature([], tf.float32),
            'c':
            tf.FixedLenFeature([1], tf.float32),
            'd':
            tf.FixedLenFeature([2, 2], tf.float32),
            'e':
            tf.VarLenFeature(tf.string),
            'f':
            tf.SparseFeature('idx', 'val', tf.float32, 10)
        })

        # Feed some dense and sparse values.
        instances = [{
            'a': 100,
            'b': 1.0,
            'c': [2.0],
            'd': [[1.0, 2.0], [3.0, 4.0]],
            'e': ['doe', 'a', 'deer'],
            'f': ([2, 4, 8], [10.0, 20.0, 30.0])
        }, {
            'a': 100,
            'b': 2.0,
            'c': [4.0],
            'd': [[5.0, 6.0], [7.0, 8.0]],
            'e': ['a', 'female', 'deer'],
            'f': ([], [])
        }]

        feed_dict = impl_helper.make_feed_dict(tensors, schema, instances)
        self.assertSetEqual(set(six.iterkeys(feed_dict)),
                            set(six.itervalues(tensors)))
        self.assertAllEqual(feed_dict[tensors['a']], [100, 100])
        self.assertAllEqual(feed_dict[tensors['b']], [1.0, 2.0])
        self.assertAllEqual(feed_dict[tensors['c']], [[2.0], [4.0]])
        self.assertAllEqual(
            feed_dict[tensors['d']],
            [[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]])
        self.assertSparseValuesEqual(
            feed_dict[tensors['e']],
            tf.SparseTensorValue(
                indices=[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)],
                values=['doe', 'a', 'deer', 'a', 'female', 'deer'],
                dense_shape=(2, 3)))
        self.assertSparseValuesEqual(
            feed_dict[tensors['f']],
            tf.SparseTensorValue(indices=[(0, 2), (0, 4), (0, 8)],
                                 values=[10.0, 20.0, 30.0],
                                 dense_shape=(2, 10)))

        # Feed numpy versions of everything.
        instances = [{
            'a': np.int64(100),
            'b': np.array(1.0, np.float32),
            'c': np.array([2.0], np.float32),
            'd': np.array([[1.0, 2.0], [3.0, 4.0]], np.float32),
            'e': ['doe', 'a', 'deer'],
            'f': (np.array([2, 4, 8]), np.array([10.0, 20.0, 30.0])),
        }, {
            'a': np.int64(100),
            'b': np.array(2.0, np.float32),
            'c': np.array([4.0], np.float32),
            'd': np.array([[5.0, 6.0], [7.0, 8.0]], np.float32),
            'e': ['a', 'female', 'deer'],
            'f': (np.array([], np.int32), np.array([], np.float32))
        }]

        feed_dict = impl_helper.make_feed_dict(tensors, schema, instances)
        self.assertSetEqual(set(six.iterkeys(feed_dict)),
                            set(six.itervalues(tensors)))
        self.assertAllEqual(feed_dict[tensors['a']], [100, 100])
        self.assertAllEqual(feed_dict[tensors['b']], [1.0, 2.0])
        self.assertAllEqual(feed_dict[tensors['c']], [[2.0], [4.0]])
        self.assertAllEqual(
            feed_dict[tensors['d']],
            [[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]])
        self.assertSparseValuesEqual(
            feed_dict[tensors['e']],
            tf.SparseTensorValue(
                indices=[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)],
                values=['doe', 'a', 'deer', 'a', 'female', 'deer'],
                dense_shape=(2, 3)))
        self.assertSparseValuesEqual(
            feed_dict[tensors['f']],
            tf.SparseTensorValue(indices=[(0, 2), (0, 4), (0, 8)],
                                 values=[10.0, 20.0, 30.0],
                                 dense_shape=(2, 10)))

        # Feed some empty sparse values
        instances = [{
            'a': 100,
            'b': 5.0,
            'c': [1.0],
            'd': [[1.0, 2.0], [3.0, 4.0]],
            'e': [],
            'f': ([], [])
        }]
        feed_dict = impl_helper.make_feed_dict(tensors, schema, instances)
        self.assertSparseValuesEqual(
            feed_dict[tensors['e']],
            tf.SparseTensorValue(indices=np.empty([0, 2], np.int64),
                                 values=[],
                                 dense_shape=(1, 0)))
        self.assertSparseValuesEqual(
            feed_dict[tensors['f']],
            tf.SparseTensorValue(indices=np.empty([0, 2], np.int64),
                                 values=[],
                                 dense_shape=(1, 10)))
Пример #11
0
    def testMakeFeedDict(self):
        tensors = {
            'a': tf.placeholder(tf.int64),
            'b': tf.placeholder(tf.float32),
            'c': tf.sparse_placeholder(tf.string),
            'd': tf.sparse_placeholder(tf.float32)
        }
        schema = self.toSchema({
            'a':
            tf.FixedLenFeature(None, tf.int64),
            'b':
            tf.FixedLenFeature([2, 2], tf.float32),
            'c':
            tf.VarLenFeature(tf.string),
            'd':
            tf.SparseFeature('idx', 'val', tf.float32, 10)
        })

        # Feed some dense and sparse values.
        instances = [{
            'a': 100,
            'b': [[1.0, 2.0], [3.0, 4.0]],
            'c': ['doe', 'a', 'deer'],
            'idx': [2, 4, 8],
            'val': [10.0, 20.0, 30.0]
        }, {
            'a': 100,
            'b': [[5.0, 6.0], [7.0, 8.0]],
            'c': ['a', 'female', 'deer'],
            'idx': [],
            'val': []
        }]

        feed_dict = impl_helper.make_feed_dict(tensors, schema, instances)
        self.assertSetEqual(set(feed_dict.keys()), set(tensors.values()))
        self.assertAllEqual(feed_dict[tensors['a']], [100, 100])
        self.assertAllEqual(
            feed_dict[tensors['b']],
            [[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]])
        self.assertSparseValuesEqual(
            feed_dict[tensors['c']],
            tf.SparseTensorValue(
                indices=[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)],
                values=['doe', 'a', 'deer', 'a', 'female', 'deer'],
                dense_shape=(2, 3)))
        self.assertSparseValuesEqual(
            feed_dict[tensors['d']],
            tf.SparseTensorValue(indices=[(0, 2), (0, 4), (0, 8)],
                                 values=[10.0, 20.0, 30.0],
                                 dense_shape=(2, 10)))

        # Feed some empty sparse values
        instances = [{
            'a': 100,
            'b': [[1.0, 2.0], [3.0, 4.0]],
            'c': [],
            'idx': [],
            'val': []
        }]
        feed_dict = impl_helper.make_feed_dict(tensors, schema, instances)
        self.assertSparseValuesEqual(
            feed_dict[tensors['c']],
            tf.SparseTensorValue(indices=np.empty([0, 2], np.int64),
                                 values=[],
                                 dense_shape=(1, 0)))
        self.assertSparseValuesEqual(
            feed_dict[tensors['d']],
            tf.SparseTensorValue(indices=np.empty([0, 2], np.int64),
                                 values=[],
                                 dense_shape=(1, 10)))