Пример #1
0
    def testCsvToExampleWithEmptyColumn(self):
        with beam.Pipeline() as pipeline:
            examples = (pipeline
                        | 'ToTFExample' >> executor._CsvToExample(
                            exec_properties={
                                standard_component_specs.INPUT_BASE_KEY:
                                self._input_data_dir
                            },
                            split_pattern='csv_empty/*'))

            def check_results(results):
                # We use Python assertion here to avoid Beam serialization error.
                assert (
                    3 == len(results)), 'Unexpected example count {}.'.format(
                        len(results))
                for example in results:
                    assert (example.features.feature['A'].HasField(
                        'int64_list')), 'Column A should be int64 type.'
                    assert (
                        not example.features.feature['B'].WhichOneof('kind')
                    ), 'Column B should be empty.'
                    assert (example.features.feature['C'].HasField(
                        'bytes_list')), 'Column C should be byte type.'
                    assert (example.features.feature['D'].HasField(
                        'float_list')), 'Column D should be float type.'

            util.assert_that(examples, check_results)
Пример #2
0
  def testCsvToExample(self):
    with beam.Pipeline() as pipeline:
      examples = (
          pipeline
          | 'ToTFExample' >> executor._CsvToExample(self._input_dict, {}))

      def check_result(got):
        self.assertEqual(15000, len(got))
        self.assertEqual(18, len(got[0].features.feature))

      util.assert_that(examples, check_result)
Пример #3
0
  def testCsvToExample(self):
    with beam.Pipeline() as pipeline:
      examples = (
          pipeline
          | 'ToTFExample' >> executor._CsvToExample(self._input_dict, {}))

      def check_result(got):
        # We use Python assertion here to avoid Beam serialization error in
        # pickling tf.test.TestCase.
        assert (15000 == len(got)), 'Unexpected example count'
        assert (18 == len(got[0].features.feature)), 'Example not match'

      util.assert_that(examples, check_result)
Пример #4
0
  def testCsvToExample(self):
    with beam.Pipeline() as pipeline:
      examples = (
          pipeline
          | 'ToTFExample' >> executor._CsvToExample(
              exec_properties={utils.INPUT_BASE_KEY: self._input_data_dir},
              split_pattern='csv/*'))

      def check_results(results):
        # We use Python assertion here to avoid Beam serialization error in
        # pickling tf.test.TestCase.
        assert (15000 == len(results)), 'Unexpected example count.'
        assert (18 == len(results[0].features.feature)), 'Example not match.'

      util.assert_that(examples, check_results)
Пример #5
0
    def testCsvToExampleMultiLineString(self):
        with beam.Pipeline() as pipeline:
            examples = (pipeline
                        | 'ToTFExample' >> executor._CsvToExample(
                            exec_properties={
                                standard_component_specs.INPUT_BASE_KEY:
                                self._input_data_dir
                            },
                            split_pattern='csv_multi_line_string/*'))

            def check_results(results):
                # We use Python assertion here to avoid Beam serialization error.
                assert (
                    3 == len(results)), 'Unexpected example count: {}.'.format(
                        len(results))
                instance = results[1]
                assert (instance.features.feature['B'].HasField('bytes_list')
                        ), 'Column B should be bytes type. '
                value = instance.features.feature['B'].bytes_list.value
                assert (value == [b'"2,\n"3",\n4\n5"'
                                  ]), 'Unexpected value: {}.'.format(value)

            util.assert_that(examples, check_results)