def testMalformedSparseFeatures(self): tensors = { 'a': tf.sparse_placeholder(tf.int64), } # Invalid indices. schema = self.toSchema({ 'a': tf.SparseFeature('idx', 'val', tf.float32, 10) }) instances = [{'a': ([-1, 2], [1.0, 2.0])}] with self.assertRaisesRegexp( ValueError, 'has index .* out of range'): _ = impl_helper.make_feed_dict(tensors, schema, instances) instances = [{'a': ([11, 1], [1.0, 2.0])}] with self.assertRaisesRegexp( ValueError, 'has index .* out of range'): _ = impl_helper.make_feed_dict(tensors, schema, instances) # Indices and values of different lengths. schema = self.toSchema({ 'a': tf.SparseFeature('idx', 'val', tf.float32, 10) }) instances = [{'a': ([1, 2], [1])}] with self.assertRaisesRegexp( ValueError, 'indices and values of different lengths'): _ = impl_helper.make_feed_dict(tensors, schema, instances) # Tuple of the wrong length. instances = [{'a': ([1], [2], [3])}] with self.assertRaisesRegexp( ValueError, 'too many values to unpack'): _ = impl_helper.make_feed_dict(tensors, schema, instances)
def test_make_feed_dict_error(self, feature_spec, instances, error_msg, error_type=ValueError): tensors = tf.parse_example(tf.placeholder(tf.string, [None]), feature_spec) schema = dataset_schema.from_feature_spec(feature_spec) with self.assertRaisesRegexp(error_type, error_msg): impl_helper.make_feed_dict(tensors, schema, instances)
def testMakeFeedDictError(self): # Missing features. tensors = { 'a': tf.placeholder(tf.int64), 'b': tf.placeholder(tf.int64) } schema = self.toSchema({ 'a': tf.FixedLenFeature([1], tf.int64), 'b': tf.FixedLenFeature([1], tf.int64) }) instances = [{'a': 100}] with self.assertRaises(KeyError): impl_helper.make_feed_dict(tensors, schema, instances)
def _handle_batch(self, batch): self._batch_size_distribution.update(len(batch)) self._num_instances.inc(len(batch)) # Making a copy of batch because mutating PCollection elements is not # allowed. if self._passthrough_keys: batch = [copy.copy(x) for x in batch] # Extract passthrough data. passthrough_data = { key: [instance.pop(key) for instance in batch ] for key in self._passthrough_keys } feed_dict = impl_helper.make_feed_dict(self._graph_state.inputs, self._input_schema, batch) try: result = self._graph_state.session.run( self._graph_state.outputs, feed_dict=feed_dict) except Exception as e: tf.logging.error('%s while applying transform function for tensors %s' % (e, self._graph_state.outputs)) raise for key, value in six.iteritems(passthrough_data): result[key] = value return result
def process(self, element, saved_model_dir): """Runs the given graph to realize the output tensors (i.e. features). Runs the graph in a TF session for computing the output values of the tensors, given an input row of data (input tensors). Due to the record-by record nature of beam we are operating sess.run() on individual record tensors vs batched tensors. Args: element: the element being processed by the DoFn saved_model_dir: Directory containing saved model. Yields: A representation of output features as a dict mapping keys (logical column names) to values. """ try: element = element.element except AttributeError: pass if saved_model_dir != self._saved_model_dir: self._initialize_graph(saved_model_dir) feed_dict = impl_helper.make_feed_dict(self._inputs, self._input_schema, element) fetched_dict = self._session.run(self._outputs, feed_dict=feed_dict) yield impl_helper.make_output_dict(self._output_schema, fetched_dict)
def process(self, element, saved_model_dir): """Runs the given graph to realize the output tensors (i.e. features). Runs the graph in a TF session for computing the output values of the tensors, given an input row of data (input tensors). Due to the record-by record nature of beam we are operating sess.run() on individual record tensors vs batched tensors. Args: element: the element being processed by the DoFn saved_model_dir: Directory containing saved model. Yields: A representation of output features as a dict mapping keys (logical column names) to values. """ if (not hasattr(self._thread_local, 'graph_state') or self._thread_local.graph_state.saved_model_dir != saved_model_dir): self._num_graph_loads.inc(1) self._thread_local.graph_state = self._GraphState( saved_model_dir, self._input_schema, self._output_schema) feed_dict = impl_helper.make_feed_dict( self._thread_local.graph_state.inputs, self._input_schema, element) fetched_dict = self._thread_local.graph_state.session.run( self._thread_local.graph_state.outputs, feed_dict=feed_dict) yield impl_helper.make_output_dict(self._output_schema, fetched_dict)
def test_make_feed_dict(self, feature_spec, instances, feed_dict): tensors = tf.parse_example(tf.placeholder(tf.string, [None]), feature_spec) schema = dataset_schema.from_feature_spec(feature_spec) # feed_dict contains feature names as keys, replace these with the # actual tensors. feed_dict = {tensors[key]: value for key, value in feed_dict.items()} np.testing.assert_equal( impl_helper.make_feed_dict(tensors, schema, instances), feed_dict)
def _flush_batch(self): self._batch_size_distribution.update(len(self._batch)) self._num_instances.inc(len(self._batch)) feed_dict = impl_helper.make_feed_dict(self._graph_state.inputs, self._input_schema, self._batch) del self._batch[:] return self._graph_state.session.run(self._graph_state.outputs, feed_dict=feed_dict)
def _handle_batch(self, batch): self._batch_size_distribution.update(len(batch)) self._num_instances.inc(len(batch)) feed_dict = impl_helper.make_feed_dict(self._graph_state.inputs, self._input_schema, batch) try: return self._graph_state.session.run( self._graph_state.outputs, feed_dict=feed_dict) except Exception as e: tf.logging.error('%s while applying transform function for tensors %s' % (e, self._graph_state.outputs)) raise
def testMakeFeedDict(self): tensors = { 'a': tf.placeholder(tf.int64), 'b': tf.placeholder(tf.float32), 'c': tf.placeholder(tf.float32), 'd': tf.placeholder(tf.float32), 'e': tf.sparse_placeholder(tf.string), 'f': tf.sparse_placeholder(tf.float32) } schema = self.toSchema({ 'a': tf.FixedLenFeature(None, tf.int64), 'b': tf.FixedLenFeature([], tf.float32), 'c': tf.FixedLenFeature([1], tf.float32), 'd': tf.FixedLenFeature([2, 2], tf.float32), 'e': tf.VarLenFeature(tf.string), 'f': tf.SparseFeature('idx', 'val', tf.float32, 10) }) # Feed some dense and sparse values. instances = [{ 'a': 100, 'b': 1.0, 'c': [2.0], 'd': [[1.0, 2.0], [3.0, 4.0]], 'e': ['doe', 'a', 'deer'], 'f': ([2, 4, 8], [10.0, 20.0, 30.0]) }, { 'a': 100, 'b': 2.0, 'c': [4.0], 'd': [[5.0, 6.0], [7.0, 8.0]], 'e': ['a', 'female', 'deer'], 'f': ([], []) }] feed_dict = impl_helper.make_feed_dict(tensors, schema, instances) self.assertSetEqual(set(six.iterkeys(feed_dict)), set(six.itervalues(tensors))) self.assertAllEqual(feed_dict[tensors['a']], [100, 100]) self.assertAllEqual(feed_dict[tensors['b']], [1.0, 2.0]) self.assertAllEqual(feed_dict[tensors['c']], [[2.0], [4.0]]) self.assertAllEqual( feed_dict[tensors['d']], [[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]) self.assertSparseValuesEqual( feed_dict[tensors['e']], tf.SparseTensorValue( indices=[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)], values=['doe', 'a', 'deer', 'a', 'female', 'deer'], dense_shape=(2, 3))) self.assertSparseValuesEqual( feed_dict[tensors['f']], tf.SparseTensorValue(indices=[(0, 2), (0, 4), (0, 8)], values=[10.0, 20.0, 30.0], dense_shape=(2, 10))) # Feed numpy versions of everything. instances = [{ 'a': np.int64(100), 'b': np.array(1.0, np.float32), 'c': np.array([2.0], np.float32), 'd': np.array([[1.0, 2.0], [3.0, 4.0]], np.float32), 'e': ['doe', 'a', 'deer'], 'f': (np.array([2, 4, 8]), np.array([10.0, 20.0, 30.0])), }, { 'a': np.int64(100), 'b': np.array(2.0, np.float32), 'c': np.array([4.0], np.float32), 'd': np.array([[5.0, 6.0], [7.0, 8.0]], np.float32), 'e': ['a', 'female', 'deer'], 'f': (np.array([], np.int32), np.array([], np.float32)) }] feed_dict = impl_helper.make_feed_dict(tensors, schema, instances) self.assertSetEqual(set(six.iterkeys(feed_dict)), set(six.itervalues(tensors))) self.assertAllEqual(feed_dict[tensors['a']], [100, 100]) self.assertAllEqual(feed_dict[tensors['b']], [1.0, 2.0]) self.assertAllEqual(feed_dict[tensors['c']], [[2.0], [4.0]]) self.assertAllEqual( feed_dict[tensors['d']], [[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]) self.assertSparseValuesEqual( feed_dict[tensors['e']], tf.SparseTensorValue( indices=[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)], values=['doe', 'a', 'deer', 'a', 'female', 'deer'], dense_shape=(2, 3))) self.assertSparseValuesEqual( feed_dict[tensors['f']], tf.SparseTensorValue(indices=[(0, 2), (0, 4), (0, 8)], values=[10.0, 20.0, 30.0], dense_shape=(2, 10))) # Feed some empty sparse values instances = [{ 'a': 100, 'b': 5.0, 'c': [1.0], 'd': [[1.0, 2.0], [3.0, 4.0]], 'e': [], 'f': ([], []) }] feed_dict = impl_helper.make_feed_dict(tensors, schema, instances) self.assertSparseValuesEqual( feed_dict[tensors['e']], tf.SparseTensorValue(indices=np.empty([0, 2], np.int64), values=[], dense_shape=(1, 0))) self.assertSparseValuesEqual( feed_dict[tensors['f']], tf.SparseTensorValue(indices=np.empty([0, 2], np.int64), values=[], dense_shape=(1, 10)))
def testMakeFeedDict(self): tensors = { 'a': tf.placeholder(tf.int64), 'b': tf.placeholder(tf.float32), 'c': tf.sparse_placeholder(tf.string), 'd': tf.sparse_placeholder(tf.float32) } schema = self.toSchema({ 'a': tf.FixedLenFeature(None, tf.int64), 'b': tf.FixedLenFeature([2, 2], tf.float32), 'c': tf.VarLenFeature(tf.string), 'd': tf.SparseFeature('idx', 'val', tf.float32, 10) }) # Feed some dense and sparse values. instances = [{ 'a': 100, 'b': [[1.0, 2.0], [3.0, 4.0]], 'c': ['doe', 'a', 'deer'], 'idx': [2, 4, 8], 'val': [10.0, 20.0, 30.0] }, { 'a': 100, 'b': [[5.0, 6.0], [7.0, 8.0]], 'c': ['a', 'female', 'deer'], 'idx': [], 'val': [] }] feed_dict = impl_helper.make_feed_dict(tensors, schema, instances) self.assertSetEqual(set(feed_dict.keys()), set(tensors.values())) self.assertAllEqual(feed_dict[tensors['a']], [100, 100]) self.assertAllEqual( feed_dict[tensors['b']], [[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]) self.assertSparseValuesEqual( feed_dict[tensors['c']], tf.SparseTensorValue( indices=[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2)], values=['doe', 'a', 'deer', 'a', 'female', 'deer'], dense_shape=(2, 3))) self.assertSparseValuesEqual( feed_dict[tensors['d']], tf.SparseTensorValue(indices=[(0, 2), (0, 4), (0, 8)], values=[10.0, 20.0, 30.0], dense_shape=(2, 10))) # Feed some empty sparse values instances = [{ 'a': 100, 'b': [[1.0, 2.0], [3.0, 4.0]], 'c': [], 'idx': [], 'val': [] }] feed_dict = impl_helper.make_feed_dict(tensors, schema, instances) self.assertSparseValuesEqual( feed_dict[tensors['c']], tf.SparseTensorValue(indices=np.empty([0, 2], np.int64), values=[], dense_shape=(1, 0))) self.assertSparseValuesEqual( feed_dict[tensors['d']], tf.SparseTensorValue(indices=np.empty([0, 2], np.int64), values=[], dense_shape=(1, 10)))