예제 #1
0
    def test_build_dataset(self):
        config = data_lib.DataConfig(split='train', fake_data=True)
        dataset = data_lib.build_dataset(config,
                                         batch_size=8,
                                         is_training=False,
                                         fake_training=False)

        # Check output_shapes.
        features_shapes, label_shape = dataset.output_shapes
        self.assertEqual([None], label_shape.as_list())
        expected_keys = [
            data_lib.feature_name(i)
            for i in range(1, data_lib.NUM_TOTAL_FEATURES + 1)
        ]
        self.assertSameElements(expected_keys, list(features_shapes.keys()))
        for key, shape in six.iteritems(features_shapes):
            self.assertEqual([None], shape.as_list(),
                             'Unexpected shape at key=' + key)

        # Check output_types.
        features_types, label_type = tf.compat.v1.data.get_output_types(
            dataset)
        self.assertEqual(tf.float32, label_type)
        for idx in data_lib.INT_FEATURE_INDICES:
            self.assertEqual(tf.float32,
                             features_types[data_lib.feature_name(idx)])
        for idx in data_lib.CAT_FEATURE_INDICES:
            self.assertEqual(tf.string,
                             features_types[data_lib.feature_name(idx)])
예제 #2
0
def _make_input_layers():
    out = {}
    for idx in range(1, data_lib.NUM_TOTAL_FEATURES + 1):
        dtype = tf.int32 if idx <= data_lib.NUM_INT_FEATURES else tf.string
        name = data_lib.feature_name(idx)
        out[name] = keras.layers.Input([], dtype=dtype, name=name)
    return out
예제 #3
0
def make_input_layers():
    """Defines an input layer for Keras model with int32 and string dtypes."""
    out = {}
    for idx in range(1, data_lib.NUM_TOTAL_FEATURES + 1):
        dtype = tf.int32 if idx <= data_lib.NUM_INT_FEATURES else tf.string
        name = data_lib.feature_name(idx)
        out[name] = keras.layers.Input([], dtype=dtype, name=name)
    return out
예제 #4
0
def make_feature_columns(opts):
    """Build feature_columns for converting features to a dense vector."""
    tffc = tf.feature_column
    out_cat = []
    for idx in data_lib.CAT_FEATURE_INDICES:
        name = data_lib.feature_name(idx)
        cat_idx = idx - data_lib.NUM_INT_FEATURES - 1
        num_buckets = opts.num_hash_buckets[cat_idx]
        num_embed_dims = opts.num_embed_dims[cat_idx]

        hash_col = tffc.categorical_column_with_hash_bucket(name, num_buckets)
        cat_col = (tffc.embedding_column(hash_col, num_embed_dims)
                   if num_embed_dims else tffc.indicator_column(hash_col))
        out_cat.append(cat_col)

    out_int = []
    for idx in data_lib.INT_FEATURE_INDICES:
        name = data_lib.feature_name(idx)
        out_int.append(tffc.numeric_column(name))
    return out_int, out_cat