Python GetBatchElementsKwargs примеры использования

Язык программирования: Python

Пространство имен/Пакет: tfx_bsl.coders.batch_util

Метод/Функция: GetBatchElementsKwargs

Примеров на hotexamples.com: 9

Python GetBatchElementsKwargs - 9 примеров найдено. Это лучшие примеры Python кода для tfx_bsl.coders.batch_util.GetBatchElementsKwargs, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

 def testGetBatchElementsKwargs(self):
     kwargs = batch_util.GetBatchElementsKwargs(batch_size=None)
     self.assertDictEqual(kwargs, {'max_batch_size': 1000})
     kwargs = batch_util.GetBatchElementsKwargs(batch_size=5000)
     self.assertDictEqual(kwargs, {
         'max_batch_size': 5000,
         'min_batch_size': 5000
     })

Пример #2

Показать файл

Файл: tf_example_record.py Проект: dhruvesh09/tfx-bsl

 def ptransform_fn(raw_records_pcoll: beam.pvalue.PCollection):
     return (raw_records_pcoll
             | "Batch" >> beam.BatchElements(
                 **batch_util.GetBatchElementsKwargs(batch_size))
             | "Decode" >> beam.ParDo(
                 _DecodeBatchExamplesDoFn(self._GetSchemaForDecoding(),
                                          self.raw_record_column_name)))

Пример #3

Показать файл

 def _PTransformFn(raw_record_pcoll: beam.pvalue.PCollection):
     return (raw_record_pcoll
             | "Batch" >> beam.BatchElements(
                 **batch_util.GetBatchElementsKwargs(batch_size))
             | "ToRecordBatch" >> beam.Map(
                 _BatchedRecordsToArrow, self.raw_record_column_name,
                 self._can_produce_large_types))

Пример #4

Показать файл

 def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection):
     return (raw_records_pcoll
             | "BatchElements" >> beam.BatchElements(
                 **batch_util.GetBatchElementsKwargs(batch_size))
             | "Decode" >> beam.ParDo(
                 _RecordsToRecordBatch(self._saved_decoder_path,
                                       self.raw_record_column_name,
                                       self._can_produce_large_types)))

Пример #5

Показать файл

 def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection):
     return (raw_records_pcoll
             | "Batch" >> beam.BatchElements(
                 **batch_util.GetBatchElementsKwargs(batch_size))
             | "Decode" >> beam.ParDo(
                 _DecodeBatchExamplesDoFn(
                     self._schema, self.raw_record_column_name,
                     self._can_produce_large_types)))

Пример #6

Показать файл

 def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection):
   return (
       raw_records_pcoll
       | "BatchElements" >> beam.BatchElements(
           **batch_util.GetBatchElementsKwargs(batch_size))
       | "Decode" >> beam.ParDo(_RecordsToRecordBatch(
           self._saved_decoder_path,
           self.telemetry_descriptors,
           shared.Shared() if self._use_singleton_decoder else None,
           self.raw_record_column_name,
           self._record_index_column_name)))

Пример #7

Показать файл

Файл: batch_util.py Проект: WeilerWebServices/TensorFlow

def BatchSerializedExamplesToArrowRecordBatches(
    examples: beam.pvalue.PCollection,
    desired_batch_size: Optional[int] = constants
    .DEFAULT_DESIRED_INPUT_BATCH_SIZE
) -> beam.pvalue.PCollection:
  """Batches serialized examples into Arrow record batches.

  Args:
    examples: A PCollection of serialized tf.Examples.
    desired_batch_size: Batch size. The output Arrow record batches will have as
      many rows as the `desired_batch_size`.

  Returns:
    A PCollection of Arrow record batches.
  """
  return (examples
          | "BatchSerializedExamples" >> beam.BatchElements(
              **batch_util.GetBatchElementsKwargs(desired_batch_size))
          | "BatchDecodeExamples" >> beam.ParDo(_BatchDecodeExamplesDoFn()))

Пример #8

Показать файл

Файл: batch_util.py Проект: WeilerWebServices/TensorFlow

def BatchExamplesToArrowRecordBatches(
    examples: beam.pvalue.PCollection,
    desired_batch_size: Optional[int] = constants
    .DEFAULT_DESIRED_INPUT_BATCH_SIZE
) -> beam.pvalue.PCollection:
  """Batches example dicts into Arrow record batches.

  Args:
    examples: A PCollection of example dicts.
    desired_batch_size: Batch size. The output Arrow record batches will have as
      many rows as the `desired_batch_size`.

  Returns:
    A PCollection of Arrow record batches.
  """
  return (
      examples
      | "BatchBeamExamples" >> beam.BatchElements(
          **batch_util.GetBatchElementsKwargs(desired_batch_size))
      | "DecodeExamplesToRecordBatch" >> beam.Map(
          # pylint: disable=unnecessary-lambda
          lambda x: decoded_examples_to_arrow.DecodedExamplesToRecordBatch(x)))

Пример #9

Показать файл

Файл: csv_decoder.py Проект: rose-rong-liu/tfx-bsl

def CSVToRecordBatch(lines: beam.pvalue.PCollection,
                     column_names: List[Text],
                     desired_batch_size: Optional[int],
                     delimiter: Text = ",",
                     skip_blank_lines: bool = True,
                     schema: Optional[schema_pb2.Schema] = None,
                     multivalent_columns: Optional[List[Union[Text,
                                                              bytes]]] = None,
                     secondary_delimiter: Optional[Union[Text, bytes]] = None,
                     raw_record_column_name: Optional[Text] = None,
                     produce_large_types: bool = False):
  """Decodes CSV records into Arrow RecordBatches.

  Args:
    lines: The pcollection of raw records (csv lines).
    column_names: List of feature names. Order must match the order in the CSV
      file.
    desired_batch_size: Batch size. The output Arrow RecordBatches will have as
      many rows as the `desired_batch_size`. If None, the batch size is auto
      tuned by beam.
    delimiter: A one-character string used to separate fields.
    skip_blank_lines: A boolean to indicate whether to skip over blank lines
      rather than interpreting them as missing values.
    schema: An optional schema of the input data. If this is provided, it must
      contain all columns.
    multivalent_columns: Columns that can contain multiple values. If
      secondary_delimiter is provided, this must also be provided.
    secondary_delimiter: Delimiter used for parsing multivalent columns. If
      multivalent_columns is provided, this must also be provided.
    raw_record_column_name: Optional name for a column containing the raw csv
      lines. If this is None, then this column will not be produced. This will
      always be the last column in the record batch.
    produce_large_types: If True, will output record batches with columns that
      are large_list types.

  Returns:
    RecordBatches of the CSV lines.

  Raises:
    ValueError:
      * If the columns do not match the specified csv headers.
      * If the schema has invalid feature types.
      * If the schema does not contain all columns.
      * If raw_record_column_name exists in column_names
  """
  if (raw_record_column_name is not None and
      raw_record_column_name in column_names):
    raise ValueError(
        "raw_record_column_name: {} is already an existing column name. "
        "Please choose a different name.".format(raw_record_column_name))

  csv_lines_and_raw_records = (
      lines | "ParseCSVLines" >> beam.ParDo(ParseCSVLine(delimiter)))

  if schema is not None:
    column_infos = _GetColumnInfosFromSchema(schema, column_names)
  else:
    # TODO(b/72746442): Consider using a DeepCopy optimization similar to TFT.
    # Do first pass to infer the feature types.
    column_infos = beam.pvalue.AsSingleton(
        csv_lines_and_raw_records
        | "ExtractParsedCSVLines" >> beam.Keys()
        | "InferColumnTypes" >> beam.CombineGlobally(
            ColumnTypeInferrer(
                column_names=column_names,
                skip_blank_lines=skip_blank_lines,
                multivalent_columns=multivalent_columns,
                secondary_delimiter=secondary_delimiter)))

  # Do second pass to generate the RecordBatches.
  return (csv_lines_and_raw_records
          | "BatchCSVLines" >> beam.BatchElements(
              **batch_util.GetBatchElementsKwargs(desired_batch_size))
          | "BatchedCSVRowsToArrow" >> beam.ParDo(
              BatchedCSVRowsToRecordBatch(
                  skip_blank_lines=skip_blank_lines,
                  multivalent_columns=multivalent_columns,
                  secondary_delimiter=secondary_delimiter,
                  raw_record_column_name=raw_record_column_name,
                  produce_large_types=produce_large_types), column_infos))