def __init__(self, coder=coders.BytesCoder(), compression_type=CompressionTypes.AUTO, **kwargs): """Initialize the ``ReadAllFromTFRecord`` transform. Args: coder: Coder used to decode each record. compression_type: Used to handle compressed input files. Default value is CompressionTypes.AUTO, in which case the file_path's extension will be used to detect the compression. **kwargs: optional args dictionary. These are passed through to parent constructor. """ super(ReadAllFromTFRecord, self).__init__(**kwargs) source_from_file = partial(_create_tfrcordio_source, compression_type=compression_type, coder=coder) # Desired and min bundle sizes do not matter since TFRecord files are # unsplittable. self._read_all_files = ReadAllFiles(splittable=False, compression_type=compression_type, desired_bundle_size=0, min_bundle_size=0, source_from_file=source_from_file)
def __init__(self, coder=coders.BytesCoder(), compression_type=CompressionTypes.AUTO, with_filename=False): """Initialize the ``ReadAllFromTFRecord`` transform. Args: coder: Coder used to decode each record. compression_type: Used to handle compressed input files. Default value is CompressionTypes.AUTO, in which case the file_path's extension will be used to detect the compression. with_filename: If True, returns a Key Value with the key being the file name and the value being the actual data. If False, it only returns the data. """ super(ReadAllFromTFRecord, self).__init__() source_from_file = partial(_create_tfrecordio_source, compression_type=compression_type, coder=coder) # Desired and min bundle sizes do not matter since TFRecord files are # unsplittable. self._read_all_files = ReadAllFiles(splittable=False, compression_type=compression_type, desired_bundle_size=0, min_bundle_size=0, source_from_file=source_from_file, with_filename=with_filename)
def __init__( self, min_bundle_size=0, desired_bundle_size=DEFAULT_DESIRED_BUNDLE_SIZE, compression_type=CompressionTypes.AUTO, strip_trailing_newlines=True, coder=coders.StrUtf8Coder(), # type: coders.Coder skip_header_lines=0, with_filename=False, delimiter=None, escapechar=None, **kwargs): """Initialize the ``ReadAllFromText`` transform. Args: min_bundle_size: Minimum size of bundles that should be generated when splitting this source into bundles. See ``FileBasedSource`` for more details. desired_bundle_size: Desired size of bundles that should be generated when splitting this source into bundles. See ``FileBasedSource`` for more details. compression_type: Used to handle compressed input files. Typical value is ``CompressionTypes.AUTO``, in which case the underlying file_path's extension will be used to detect the compression. strip_trailing_newlines: Indicates whether this source should remove the newline char in each line it reads before decoding that line. validate: flag to verify that the files exist during the pipeline creation time. skip_header_lines: Number of header lines to skip. Same number is skipped from each source file. Must be 0 or higher. Large number of skipped lines might impact performance. coder: Coder used to decode each line. with_filename: If True, returns a Key Value with the key being the file name and the value being the actual data. If False, it only returns the data. delimiter (bytes) Optional: delimiter to split records. Must not self-overlap, because self-overlapping delimiters cause ambiguous parsing. escapechar (bytes) Optional: a single byte to escape the records delimiter, can also escape itself. """ super().__init__(**kwargs) source_from_file = partial( _create_text_source, min_bundle_size=min_bundle_size, compression_type=compression_type, strip_trailing_newlines=strip_trailing_newlines, coder=coder, skip_header_lines=skip_header_lines, delimiter=delimiter, escapechar=escapechar) self._desired_bundle_size = desired_bundle_size self._min_bundle_size = min_bundle_size self._compression_type = compression_type self._read_all_files = ReadAllFiles(True, compression_type, desired_bundle_size, min_bundle_size, source_from_file, with_filename)
def __init__( self, min_bundle_size=0, desired_bundle_size=DEFAULT_DESIRED_BUNDLE_SIZE, compression_type=CompressionTypes.AUTO, strip_trailing_newlines=True, coder=coders.StrUtf8Coder(), # type: coders.Coder skip_header_lines=0, **kwargs): """Initialize the ``ReadAllFromText`` transform. Args: min_bundle_size: Minimum size of bundles that should be generated when splitting this source into bundles. See ``FileBasedSource`` for more details. desired_bundle_size: Desired size of bundles that should be generated when splitting this source into bundles. See ``FileBasedSource`` for more details. compression_type: Used to handle compressed input files. Typical value is ``CompressionTypes.AUTO``, in which case the underlying file_path's extension will be used to detect the compression. strip_trailing_newlines: Indicates whether this source should remove the newline char in each line it reads before decoding that line. validate: flag to verify that the files exist during the pipeline creation time. skip_header_lines: Number of header lines to skip. Same number is skipped from each source file. Must be 0 or higher. Large number of skipped lines might impact performance. coder: Coder used to decode each line. """ super(ReadAllFromText, self).__init__(**kwargs) source_from_file = partial( _create_text_source, min_bundle_size=min_bundle_size, compression_type=compression_type, strip_trailing_newlines=strip_trailing_newlines, coder=coder, skip_header_lines=skip_header_lines) self._desired_bundle_size = desired_bundle_size self._min_bundle_size = min_bundle_size self._compression_type = compression_type self._read_all_files = ReadAllFiles( True, compression_type, desired_bundle_size, min_bundle_size, source_from_file)