Exemplo n.º 1
0
    def _test_build_transforming_training_input_fn(self, shape):
        basedir = tempfile.mkdtemp()

        raw_metadata = dataset_metadata.DatasetMetadata(
            schema=_make_raw_schema(shape))

        # the transformed schema should be vectorized already.
        transformed_metadata = dataset_metadata.DatasetMetadata(
            schema=_make_transformed_schema([1]))
        data_file = os.path.join(basedir, 'data')
        examples = [
            _create_serialized_example(d) for d in [{
                'raw_a': 15,
                'raw_b': 6,
                'raw_label': 77
            }, {
                'raw_a': 12,
                'raw_b': 17,
                'raw_label': 44
            }]
        ]
        _write_tfrecord(data_file, examples)

        transform_savedmodel_dir = os.path.join(basedir,
                                                'transform-savedmodel')
        _write_transform_savedmodel(transform_savedmodel_dir)

        training_input_fn = (
            input_fn_maker.build_transforming_training_input_fn(
                raw_metadata=raw_metadata,
                transformed_metadata=transformed_metadata,
                transform_savedmodel_dir=transform_savedmodel_dir,
                raw_data_file_pattern=[data_file],
                training_batch_size=128,
                raw_label_keys=['raw_label'],
                transformed_label_keys=['transformed_label'],
                raw_feature_keys=['raw_a', 'raw_b'],
                transformed_feature_keys=['transformed_a', 'transformed_b'],
                randomize_input=False))

        with tf.Graph().as_default():
            features, labels = training_input_fn()

            with tf.Session().as_default() as session:
                session.run(tf.initialize_all_variables())
                tf.train.start_queue_runners()
                transformed_a, transformed_b, transformed_label = session.run([
                    features['transformed_a'], features['transformed_b'],
                    labels
                ])

        self.assertEqual(21, transformed_a[0][0])
        self.assertEqual(9, transformed_b[0][0])
        self.assertEqual(77000, transformed_label[0][0])
        self.assertEqual(29, transformed_a[1][0])
        self.assertEqual(-5, transformed_b[1][0])
        self.assertEqual(44000, transformed_label[1][0])
Exemplo n.º 2
0
  def _test_build_transforming_training_input_fn(self, shape):
    basedir = tempfile.mkdtemp()

    raw_metadata = dataset_metadata.DatasetMetadata(
        schema=_make_raw_schema(shape, should_add_unused_feature=True))

    # the transformed schema should be vectorized already.
    transformed_metadata = dataset_metadata.DatasetMetadata(
        schema=_make_transformed_schema([1]))
    data_file = os.path.join(basedir, 'data')
    examples = [_create_serialized_example(d)
                for d in [
                    {'raw_a': 15,
                     'raw_b': 6,
                     'raw_label': 77},
                    {'raw_a': 12,
                     'raw_b': 17,
                     'raw_label': 44}]]
    _write_tfrecord(data_file, examples)

    transform_savedmodel_dir = os.path.join(basedir, 'transform-savedmodel')
    _write_transform_savedmodel(
        transform_savedmodel_dir, should_add_unused_feature=True)

    training_input_fn = (
        input_fn_maker.build_transforming_training_input_fn(
            raw_metadata=raw_metadata,
            transformed_metadata=transformed_metadata,
            transform_savedmodel_dir=transform_savedmodel_dir,
            raw_data_file_pattern=[data_file],
            training_batch_size=128,
            transformed_label_keys=['transformed_label'],
            randomize_input=False,
            convert_scalars_to_vectors=True))

    with tf.Graph().as_default():
      features, labels = training_input_fn()

      with tf.Session().as_default() as session:
        session.run(tf.initialize_all_variables())
        tf.train.start_queue_runners()
        transformed_a, transformed_b, transformed_label = session.run(
            [features['transformed_a'],
             features['transformed_b'],
             labels])

    batch_shape = (128, 1)
    sparse_batch_shape = batch_shape

    if not shape:
      # transformed_b is sparse so _convert_scalars_to_vectors did not fix it
      sparse_batch_shape = sparse_batch_shape[:1]
      transformed_b_dict = dict(zip([tuple(x + [0])
                                     for x in transformed_b.indices.tolist()],
                                    transformed_b.values.tolist()))
    else:
      transformed_b_dict = dict(zip([tuple(x)
                                     for x in transformed_b.indices.tolist()],
                                    transformed_b.values.tolist()))

    self.assertEqual(batch_shape, tuple(transformed_a.shape))
    self.assertEqual(sparse_batch_shape, tuple(transformed_b.dense_shape))
    self.assertEqual(batch_shape, tuple(transformed_label.shape))

    self.assertEqual(21, transformed_a[0][0])
    self.assertEqual(9, transformed_b_dict[(0, 0)])
    self.assertEqual(77000, transformed_label[0][0])
    self.assertEqual(29, transformed_a[1][0])
    self.assertEqual(-5, transformed_b_dict[(1, 0)])
    self.assertEqual(44000, transformed_label[1][0])