Exemplo n.º 1
0
def tf_record_iterator(path, options=None):
    """An iterator that read the records from a TFRecords file.

  Args:
    path: The path to the TFRecords file.
    options: (optional) A TFRecordOptions object.

  Yields:
    Strings.

  Raises:
    IOError: If `path` cannot be opened for reading.
  """
    compression_type_string = ""
    if options:
        if options.compression_type == TFRecordCompressionType.ZLIB:
            compression_type_string = "ZLIB"

    reader = pywrap_tensorflow.PyRecordReader_New(
        compat.as_bytes(path), 0, compat.as_bytes(compression_type_string))

    if reader is None:
        raise IOError("Could not open %s." % path)
    while reader.GetNext():
        yield reader.record()
    reader.Close()
Exemplo n.º 2
0
 def __init__(self, file_path):
   if file_path is None:
     raise ValueError('A file path is required')
   file_path = resource_loader.readahead_file_path(file_path)
   logging.debug('Opening a record reader pointing at %s', file_path)
   with errors.raise_exception_on_not_ok_status() as status:
     self._reader = pywrap_tensorflow.PyRecordReader_New(
         compat.as_bytes(file_path), 0, compat.as_bytes(''), status)
   # Store it for logging purposes.
   self._file_path = file_path
   if not self._reader:
     raise IOError('Failed to open a record reader pointing to %s' % file_path)
Exemplo n.º 3
0
 def Load(self):
     # Create a temp file to hold the contents that we haven't seen yet.
     with tempfile.NamedTemporaryFile(prefix='tf-gcs-') as temp_file:
         name = temp_file.name
         logging.debug('Temp file created at %s', name)
         gcs.CopyContents(self._gcs_path, self._gcs_offset, temp_file)
         reader = pywrap_tensorflow.PyRecordReader_New(
             compat.as_bytes(name), 0)
         while reader.GetNext():
             event = event_pb2.Event()
             event.ParseFromString(reader.record())
             yield event
         logging.debug('No more events in %s', name)
         self._gcs_offset += reader.offset()
Exemplo n.º 4
0
def tf_record_iterator(path):
  """An iterator that read the records from a TFRecords file.

  Args:
    path: The path to the TFRecords file.

  Yields:
    Strings.

  Raises:
    IOError: If `path` cannot be opened for reading.
  """
  reader = pywrap_tensorflow.PyRecordReader_New(path, 0)
  if reader is None:
    raise IOError("Could not open %s." % path)
  while reader.GetNext():
    yield reader.record()
  reader.Close()
Exemplo n.º 5
0
    def _generic_iterator(self, file_path):
        """A helper method that makes an iterator given a debug-events file path.

    Repeated calls to this method create iterators that remember the last
    successful reading position (offset) for each given `file_path`. So the
    iterators are meant for incremental reading of the file.

    Args:
      file_path: Path to the file to create the iterator for.

    Yields:
      A tuple of (offset, debug_event_proto) on each `next()` call.
    """
        # The following code uses the double-checked locking pattern to optimize
        # the common case (where the reader is already initialized).
        if file_path not in self._readers:  # 1st check, without lock.
            with self._readers_lock:
                if file_path not in self._readers:  # 2nd check, with lock.
                    with errors.raise_exception_on_not_ok_status() as status:
                        # TODO(b/136474806): Use tf_record.tf_record_iterator() once it
                        # supports offset.
                        self._readers[
                            file_path] = pywrap_tensorflow.PyRecordReader_New(
                                compat.as_bytes(file_path), 0, b"", status)
        reader = self._readers[file_path]
        while True:
            offset = reader.offset()
            try:
                reader.GetNext()
            except (errors.DataLossError, errors.OutOfRangeError):
                # We ignore partial read exceptions, because a record may be truncated.
                # PyRecordReader holds the offset prior to the failed read, so retrying
                # will succeed.
                break
            yield DebugEventWithOffset(
                debug_event=debug_event_pb2.DebugEvent.FromString(
                    reader.record()),
                offset=offset)
Exemplo n.º 6
0
 def _create_offset_reader(self, file_path, offset):
     with errors.raise_exception_on_not_ok_status() as status:
         # TODO(b/136474806): Use tf_record.tf_record_iterator() once it
         # supports ofset.
         return pywrap_tensorflow.PyRecordReader_New(
             file_path, offset, b"", status)