示例#1
0
 def testRandomReaderThrowsErrorForInvalidOffset(self):
     records = [self._Record(0, i) for i in range(self._num_records)]
     fn = self._WriteRecordsToFile(records, "uncompressed_records")
     reader = tf_record.tf_record_random_reader(fn)
     with self.assertRaisesRegex(errors_impl.DataLossError,
                                 r"corrupted record"):
         reader.read(1)  # 1 is guaranteed to be an invalid offset.
示例#2
0
  def _load_metadata_files(self):
    """Load and parse metadata files in the dump root.

    Check that all metadata files have a common tfdbg_run_id, and raise
    a ValueError if their tfdbg_run_ids differ.

    Returns:
      A list of metadata file paths in ascending order of their starting
        wall_time timestamp.
    """

    metadata_paths = file_io.get_matching_files(
        os.path.join(self._dump_root, "*%s" % self._METADATA_SUFFIX))
    if not metadata_paths:
      raise ValueError("Cannot find any tfdbg metadata file in directory: %s" %
                       self._dump_root)
    wall_times = []
    run_ids = []
    tensorflow_versions = []
    file_versions = []
    for metadata_path in metadata_paths:
      reader = tf_record.tf_record_random_reader(metadata_path)
      try:
        record = reader.read(0)[0]
        debug_event = debug_event_pb2.DebugEvent.FromString(record)
        wall_times.append(debug_event.wall_time)
        run_ids.append(debug_event.debug_metadata.tfdbg_run_id)
        tensorflow_versions.append(
            debug_event.debug_metadata.tensorflow_version)
        file_versions.append(debug_event.debug_metadata.file_version)
      finally:
        reader.close()
    self._starting_wall_time = wall_times[0]
    self._tfdbg_run_id = run_ids[0]
    self._tensorflow_version = tensorflow_versions[0]
    self._file_version = file_versions[0]
    if len(metadata_paths) == 1:
      # Fast path for a common case (only one DebugEvent file set.)
      return metadata_paths

    num_no_id = len([run_id for run_id in run_ids if not run_id])
    if num_no_id:
      paths_without_run_id = [
          metadata_path
          for metadata_path, run_id in zip(metadata_paths, run_ids)
          if not run_id
      ]
      raise ValueError(
          "Found %d tfdbg metadata files and %d of them do not "
          "have tfdbg run ids. The metadata files without run ids are: %s" %
          (len(run_ids), num_no_id, paths_without_run_id))
    elif len(set(run_ids)) != 1:
      raise ValueError(
          "Unexpected: Found multiple (%d) tfdbg2 runs in directory %s" %
          (len(set(run_ids)), self._dump_root))
    # Return the metadata files in ascending order of their timestamps.
    paths_and_timestamps = sorted(
        zip(metadata_paths, wall_times), key=lambda t: t[1])
    self._starting_wall_time = paths_and_timestamps[0][1]
    return [path[0] for path in paths_and_timestamps]
示例#3
0
 def testClosingRandomReaderCausesErrorsForFurtherReading(self):
     records = [self._Record(0, i) for i in range(self._num_records)]
     fn = self._WriteRecordsToFile(records, "uncompressed_records")
     reader = tf_record.tf_record_random_reader(fn)
     reader.close()
     with self.assertRaisesRegex(errors_impl.FailedPreconditionError,
                                 r"closed"):
         reader.read(0)
 def _get_reader(self, file_path):
     """Get a random-access reader for TFRecords file at file_path."""
     file_path = compat.as_bytes(file_path)
     # The following code uses the double-checked locking pattern to optimize
     # the common case (where the reader is already initialized).
     if file_path not in self._readers:  # 1st check, without lock.
         with self._readers_lock:
             if file_path not in self._readers:  # 2nd check, with lock.
                 self._readers[
                     file_path] = tf_record.tf_record_random_reader(
                         file_path)
                 self._reader_offsets[file_path] = 0
     return self._readers[file_path]
示例#5
0
    def testRandomReaderReadingWorks(self):
        """Test read access to random offsets in the TFRecord file."""
        records = [self._Record(0, i) for i in range(self._num_records)]
        fn = self._WriteRecordsToFile(records, "uncompressed_records")
        reader = tf_record.tf_record_random_reader(fn)

        offset = 0
        offsets = [offset]
        # Do a pass of forward reading.
        for i in range(self._num_records):
            record, offset = reader.read(offset)
            self.assertEqual(record, records[i])
            offsets.append(offset)
        # Reading off the bound should lead to error.
        with self.assertRaisesRegex(IndexError, r"Out of range.*offset"):
            reader.read(offset)
        # Do a pass of backward reading.
        for i in range(self._num_records - 1, 0, -1):
            record, offset = reader.read(offsets[i])
            self.assertEqual(offset, offsets[i + 1])
            self.assertEqual(record, records[i])
示例#6
0
<tf.Operation 'fifo_queue_EnqueueMany' type=QueueEnqueueManyV2>
>>> r(op)
>>> r(work_completed)
0
>>> r(produced)
0
>>> kv = reader.read(queue)
>>> kv
ReaderReadV2(key=<tf.Tensor 'ReaderReadV2:0' shape=() dtype=string>, value=<tf.Tensor 'ReaderReadV2:1' shape=() dtype=string>)
>>> r(kv)
ReaderReadV2(key=b'gs://tpu-usc1/datasets/imagenet/validation-00117-of-00128:0', value=b'\n\xc....')


# immediate record reading:
from tensorflow.python.lib.io import tf_record
rdr = tf_record.tf_record_random_reader( 'gs://tpu-usc1/datasets/imagenet/validation-00117-of-00128' )
rec = (b'', 0)
rec = rdr.read(rec[-1])
rec = rdr.read(rec[-1])
rec = rdr.read(rec[-1])
...
from google.protobuf.json_format import MessageToJson
print(MessageToJson(tf.train.Example.FromString(zz[0])))



# imediate record iteration:
>>> for x in tf_record.tf_record_iterator( 'gs://tpu-usc1/tmp/foo.tfrecord' ): print(x)
... 
b'foo'
b'bar'