Пример #1
0
    def testPreservesMetadata(self):
        s = schema.Struct(
            ('a', schema.Scalar(np.float32)),
            ('b',
             schema.Scalar(np.int32,
                           metadata=schema.Metadata(categorical_limit=5))),
            ('c',
             schema.List(
                 schema.Scalar(
                     np.int32,
                     metadata=schema.Metadata(categorical_limit=6)))))
        # attach metadata to lengths field
        s.c.lengths.set_metadata(schema.Metadata(categorical_limit=7))

        self.assertEqual(None, s.a.metadata)
        self.assertEqual(5, s.b.metadata.categorical_limit)
        self.assertEqual(6, s.c.value.metadata.categorical_limit)
        self.assertEqual(7, s.c.lengths.metadata.categorical_limit)
        sc = s.clone()
        self.assertEqual(None, sc.a.metadata)
        self.assertEqual(5, sc.b.metadata.categorical_limit)
        self.assertEqual(6, sc.c.value.metadata.categorical_limit)
        self.assertEqual(7, sc.c.lengths.metadata.categorical_limit)
        sv = schema.from_blob_list(s, [
            np.array([3.4]),
            np.array([2]),
            np.array([3]),
            np.array([1, 2, 3])
        ])
        self.assertEqual(None, sv.a.metadata)
        self.assertEqual(5, sv.b.metadata.categorical_limit)
        self.assertEqual(6, sv.c.value.metadata.categorical_limit)
        self.assertEqual(7, sv.c.lengths.metadata.categorical_limit)
Пример #2
0
    def __init__(self,
                 model,
                 input_record,
                 seed=0,
                 modulo=None,
                 use_hashing=True,
                 name='sparse_feature_hash',
                 **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record,
                                                **kwargs)

        self.seed = seed
        self.use_hashing = use_hashing
        if schema.equal_schemas(input_record, IdList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, self.get_next_blob_reference("hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.List(
                values=hashed_indices,
                lengths_blob=input_record.lengths,
            )
        elif schema.equal_schemas(input_record, IdScoreList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, self.get_next_blob_reference("hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.Map(
                keys=hashed_indices,
                values=input_record.values,
                lengths_blob=input_record.lengths,
            )
        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"

        assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo)

        # operators in this layer do not have CUDA implementation yet.
        # In addition, since the sparse feature keys that we are hashing are
        # typically on CPU originally, it makes sense to have this layer on CPU.
        self.tags.update([Tags.CPU_ONLY])
Пример #3
0
    def __init__(self,
                 model,
                 input_record,
                 seed,
                 name='sparse_feature_hash',
                 **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record,
                                                **kwargs)

        self.seed = seed
        self.lengths_blob = schema.Scalar(
            np.int32,
            model.net.NextScopedBlob(name + "_lengths"),
        )

        if schema.equal_schemas(input_record, IdList):
            self.modulo = self.extract_hash_size(input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, model.net.NextScopedBlob(name + "_hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.List(
                values=hashed_indices,
                lengths_blob=self.lengths_blob,
            )
        elif schema.equal_schemas(input_record, IdScoreList):
            self.values_blob = schema.Scalar(
                np.float32,
                model.net.NextScopedBlob(name + "_values"),
            )
            self.modulo = self.extract_hash_size(input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, model.net.NextScopedBlob(name + "_hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.Map(
                keys=hashed_indices,
                values=self.values_blob,
                lengths_blob=self.lengths_blob,
            )
        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"
Пример #4
0
def set_request_only(field):
    for f in field.all_scalars():
        categorical_limit, expected_value = None, None
        if not f.metadata:
            feature_specs = schema.FeatureSpec(
                feature_is_request_only=True,
            )
        elif not f.metadata.feature_specs:
            categorical_limit = f.metadata.categorical_limit
            expected_value = f.metadata.expected_value
            feature_specs = schema.FeatureSpec(
                feature_is_request_only=True,
            )
        else:
            categorical_limit = f.metadata.categorical_limit
            expected_value = f.metadata.expected_value
            feature_specs = schema.FeatureSpec(
                feature_type=f.metadata.feature_specs.feature_type,
                feature_names=f.metadata.feature_specs.feature_names,
                feature_ids=f.metadata.feature_specs.feature_ids,
                feature_is_request_only=True,
            )

        # make sure not to set categorical_limit for a non-integer field
        if not np.issubdtype(f.field_type(), np.integer):
            assert categorical_limit is None, \
                "categorical_limit shouldn't be set for no-integer field"

        f.set_metadata(
            schema.Metadata(
                categorical_limit=categorical_limit,
                expected_value=expected_value,
                feature_specs=feature_specs,
            )
        )
Пример #5
0
    def __init__(self,
                 model,
                 input_record,
                 seed=0,
                 modulo=None,
                 use_hashing=True,
                 name='sparse_feature_hash',
                 **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record,
                                                **kwargs)

        self.seed = seed
        self.use_hashing = use_hashing
        if schema.equal_schemas(input_record, IdList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
                expected_value=input_record.items.metadata.expected_value)
            with core.NameScope(name):
                self.output_schema = schema.NewRecord(model.net, IdList)
            self.output_schema.items.set_metadata(metadata)

        elif schema.equal_schemas(input_record, IdScoreList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
                expected_value=input_record.keys.metadata.expected_value)
            with core.NameScope(name):
                self.output_schema = schema.NewRecord(model.net, IdScoreList)
            self.output_schema.keys.set_metadata(metadata)

        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"

        assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo)
        if input_record.lengths.metadata:
            self.output_schema.lengths.set_metadata(
                input_record.lengths.metadata)

        # operators in this layer do not have CUDA implementation yet.
        # In addition, since the sparse feature keys that we are hashing are
        # typically on CPU originally, it makes sense to have this layer on CPU.
        self.tags.update([Tags.CPU_ONLY])
Пример #6
0
    def __init__(self,
                 model,
                 input_record,
                 seed=0,
                 modulo=None,
                 use_hashing=True,
                 name='sparse_feature_hash',
                 **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record,
                                                **kwargs)

        self.seed = seed
        self.use_hashing = use_hashing
        if schema.equal_schemas(input_record, IdList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, self.get_next_blob_reference("hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.List(
                values=hashed_indices,
                lengths_blob=input_record.lengths,
            )
        elif schema.equal_schemas(input_record, IdScoreList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, self.get_next_blob_reference("hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.Map(
                keys=hashed_indices,
                values=input_record.values,
                lengths_blob=input_record.lengths,
            )
        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"

        assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo)
Пример #7
0
    def __init__(self,
                 model,
                 input_record,
                 axis=1,
                 add_axis=0,
                 name='concat',
                 **kwargs):
        super(Concat, self).__init__(model, name, input_record, **kwargs)
        self.axis = axis
        self.add_axis = add_axis
        assert not (axis == 0 and add_axis == 1), \
            "It's not allowed to add axis=0"
        assert isinstance(input_record, schema.Struct),\
            "Incorrect input type. Expected Struct, but received: {0}".\
            format(input_record)

        shapes = []
        for field_name, field_type in viewitems(input_record.fields):
            assert isinstance(field_type, schema.Scalar),\
                "Incorrect input type for {}. Expected Scalar, but got: {}".\
                format(field_name, field_type)
            # Assume that first dimension is batch, so actual axis in shape is
            # axis - 1
            shape = list(field_type.field_type().shape)
            if add_axis:
                shape.insert(axis - 1, 1)
            assert len(shape) >= axis,\
                "Concat expects that limited dimensions of the input tensor"
            shapes.append(shape)
        logger.info('Concat Layer input shapes: ' + str(shapes))

        if axis == 0:
            self.output_schema = schema.from_blob_list(
                input_record[0], [self.get_next_blob_reference('output')])
            return

        concat_dim = 0
        for shape in shapes:
            concat_dim += shape[axis - 1]
            shape[axis - 1] = 0
            assert shape == shapes[0],\
                "Shapes {0} and {1} are not compatible for Concat".\
                format(shape, shapes[0])
        output_dims = shapes[0]
        output_dims[axis - 1] = concat_dim

        logger.info('Concat Layer output_dims: ' + str(output_dims))
        self.output_schema = schema.Scalar(
            (np.float32, output_dims), self.get_next_blob_reference('output'))

        record_to_concat = input_record.fields.values()
        concated_feature_to_index = get_concatenated_feature_to_index(
            record_to_concat)
        if concated_feature_to_index:
            metadata = schema.Metadata(feature_specs=schema.FeatureSpec(
                feature_to_index=concated_feature_to_index))
            self.output_schema.set_metadata(metadata)
Пример #8
0
    def testSetRequestOnly(self):
        input_record = schema.Scalar(np.int64)
        schema.attach_metadata_to_scalars(
            input_record,
            schema.Metadata(
                categorical_limit=100000000,
                expected_value=99,
                feature_specs=schema.FeatureSpec(feature_ids=[1, 100, 1001])))

        set_request_only(input_record)
        self.assertEqual(input_record.metadata.categorical_limit, 100000000)
        self.assertEqual(input_record.metadata.expected_value, 99)
        self.assertEqual(input_record.metadata.feature_specs.feature_ids,
                         [1, 100, 1001])
Пример #9
0
    def __init__(self, model, input_record, name='merged'):
        super(MergeIdLists, self).__init__(model, name, input_record)
        assert all(schema.equal_schemas(x, IdList) for x in input_record), \
            "Inputs to MergeIdLists should all be IdLists."

        assert all(record.items.metadata is not None
                   for record in self.input_record), \
            "Features without metadata are not supported"

        merge_dim = max(
            get_categorical_limit(record) for record in self.input_record)
        assert merge_dim is not None, "Unbounded features are not supported"

        self.output_schema = schema.NewRecord(
            model.net,
            schema.List(
                schema.Scalar(
                    np.int64,
                    blob=model.net.NextBlob(name),
                    metadata=schema.Metadata(categorical_limit=merge_dim))))
Пример #10
0
    def testMergeIdListsLayer(self, num_inputs, batch_size):
        inputs = []
        for _ in range(num_inputs):
            lengths = np.random.randint(5, size=batch_size).astype(np.int32)
            size = lengths.sum()
            values = np.random.randint(1, 10, size=size).astype(np.int64)
            inputs.append(lengths)
            inputs.append(values)
        input_schema = schema.Tuple(*[
            schema.List(
                schema.Scalar(dtype=np.int64,
                              metadata=schema.Metadata(categorical_limit=20)))
            for _ in range(num_inputs)
        ])

        input_record = schema.NewRecord(self.model.net, input_schema)
        schema.FeedRecord(input_record, inputs)
        output_schema = self.model.MergeIdLists(input_record)
        assert schema.equal_schemas(output_schema,
                                    IdList,
                                    check_field_names=False)
Пример #11
0
    def testSparseLookup(self):
        record = schema.NewRecord(self.model.net, schema.Struct(
            ('sparse', schema.Struct(
                ('sparse_feature_0', schema.List(
                    schema.Scalar(np.int64,
                                  metadata=schema.Metadata(categorical_limit=1000)))),
            )),
        ))
        embedding_dim = 64
        embedding_after_pooling = self.model.SparseLookup(
            record.sparse.sparse_feature_0, [embedding_dim], 'Sum')
        self.model.output_schema = embedding_after_pooling
        self.assertEqual(
            schema.Scalar((np.float32, (embedding_dim, ))),
            embedding_after_pooling
        )

        train_init_net, train_net = self.get_training_nets()

        init_ops = self.assertNetContainOps(
            train_init_net,
            [
                OpSpec("UniformFill", None, None),
                OpSpec("ConstantFill", None, None),
            ]
        )
        sparse_lookup_op_spec = OpSpec(
            'SparseLengthsSum',
            [
                init_ops[0].output[0],
                record.sparse.sparse_feature_0.items(),
                record.sparse.sparse_feature_0.lengths(),
            ],
            [embedding_after_pooling()]
        )
        self.assertNetContainOps(train_net, [sparse_lookup_op_spec])

        predict_net = self.get_predict_net()
        self.assertNetContainOps(predict_net, [sparse_lookup_op_spec])
Пример #12
0
    def __init__(self,
                 model,
                 input_record,
                 input_specs,
                 name='sparse_to_dense',
                 **kwargs):
        """
        `input_specs` follows the format of FeatureSpec from schema. To be more
        precise it's a namedtuple that should have:
            'feature_type', 'feature_names', 'feature_ids'
        """
        super(SparseToDense, self).__init__(model, name, input_record,
                                            **kwargs)

        self.input_specs = input_specs

        outputs = []
        for field, feature_specs in self.input_specs:
            assert len(feature_specs.feature_names) ==\
                len(feature_specs.feature_ids)
            if feature_specs.feature_type == 'FLOAT':
                outputs.append(
                    (field,
                     schema.Scalar(
                         (np.float32, (len(feature_specs.feature_ids), )),
                         model.net.NextScopedBlob(name + '_' + field +
                                                  '_output'))))
            elif feature_specs.feature_type == 'ID_LIST':
                outputs.append(
                    (field,
                     schema.Struct(
                         (
                             'ranges',
                             schema.Scalar(
                                 (np.int32,
                                  (len(feature_specs.feature_ids), 2)),
                                 model.net.NextScopedBlob(name + '_' + field +
                                                          '_ranges')),
                         ),
                         ('values', input_record[field].values.items),
                     )))
            elif feature_specs.feature_type == 'ID_SCORE_LIST':
                outputs.append(
                    (field,
                     schema.Struct(
                         (
                             'ranges',
                             schema.Scalar(
                                 (np.int32,
                                  (len(feature_specs.feature_ids), 2)),
                                 model.net.NextScopedBlob(name + '_' + field +
                                                          '_ranges')),
                         ),
                         ('ids', input_record[field].values.keys),
                         ('scores', input_record[field].values.values),
                     )))
            else:
                raise TypeError("Unsupported input type: {0}".format(
                    feature_specs.feature_type))

        # TODO(amalevich): This schema is producing ranges. And thus if there is
        # something using it it should support ranges as well. It might be
        # confusing, if we don't add better support for ranges/have it as a
        # first layer
        self.output_schema = schema.Struct(*outputs)

        # TODO(amalevich): Consider moving this data to schema, instead
        # Structs doens't support attaching metadata to them and clonning
        # will break things badly, but this is the most elegant way to pass
        # this info around. Should we change it or it'll be too much work and
        # not worse it?
        for field, feature_specs in input_specs:
            schema.attach_metadata_to_scalars(
                self.output_schema[field],
                schema.Metadata(feature_specs=feature_specs))
        self.zero = model.global_constants['ZERO']
        self.zero_range = model.global_constants['ZERO_RANGE']
Пример #13
0
    def __init__(self,
                 model,
                 input_record,
                 inner_shape,
                 reducer,
                 weight_init=None,
                 weight_optim=None,
                 name='sparse_lookup',
                 **kwargs):
        super(SparseLookup, self).__init__(model, name, input_record, **kwargs)

        if isinstance(inner_shape, int):
            inner_shape = [inner_shape]
        assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\
            "Unexpected type for inner_shape, expected list or tuple, got {0}".\
            format(type(inner_shape))

        # TODO Add some asserts about input type
        assert reducer in self._supported_reducers, "Unsupported reducer: {}".\
            format(reducer)
        self.reducer = reducer

        assert input_record.items.metadata is not None,\
            "Features without metadata are not supported"
        input_dim = input_record.items.metadata.categorical_limit
        assert input_dim is not None, "Unbounded features are not supported"

        self.output_schema = schema.Scalar(
            (np.float32, inner_shape),
            model.net.NextScopedBlob(name + '_output'),
        )

        if self.request_only:
            schema.attach_metadata_to_scalars(
                self.output_schema,
                schema.Metadata(categorical_limit=None,
                                expected_value=None,
                                feature_specs=schema.FeatureSpec(
                                    feature_is_request_only=True)))
        scale = math.sqrt(1.0 / input_dim)
        self.shape = [input_dim] + inner_shape
        self.weight_init = weight_init if weight_init else ('UniformFill', {
            'min': -scale,
            'max': scale
        })

        self.w = model.net.NextScopedBlob(name + "_w")
        self.params.append(
            LayerParameter(parameter=self.w,
                           initializer=core.CreateOperator(
                               self.weight_init[0], [],
                               self.w,
                               shape=self.shape,
                               **self.weight_init[1]),
                           optimizer=weight_optim))

        if reducer == 'PositionWeighted':
            self.pos_w = model.net.NextScopedBlob(name + "_pos_w")
            self.params.append(
                LayerParameter(parameter=self.pos_w,
                               initializer=core.CreateOperator('ConstantFill',
                                                               [],
                                                               self.pos_w,
                                                               shape=[
                                                                   input_dim,
                                                               ],
                                                               value=1.0),
                               optimizer=weight_optim))
Пример #14
0
    def __init__(self,
                 model,
                 input_record,
                 seed=0,
                 modulo=None,
                 use_hashing=True,
                 use_divide_mod=False,
                 divisor=None,
                 name='sparse_feature_hash',
                 **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record,
                                                **kwargs)

        assert use_hashing + use_divide_mod < 2, "use_hashing and use_divide_mod cannot be set true at the same time."

        if use_divide_mod:
            assert divisor >= 1, 'Unexpected divisor: {}'.format(divisor)

            self.divisor = self.create_param(
                param_name='divisor',
                shape=[1],
                initializer=('GivenTensorInt64Fill', {
                    'values': np.array([divisor])
                }),
                optimizer=model.NoOptim)

        self.seed = seed
        self.use_hashing = use_hashing
        self.use_divide_mod = use_divide_mod

        if schema.equal_schemas(input_record, IdList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
                expected_value=input_record.items.metadata.expected_value)
            with core.NameScope(name):
                self.output_schema = schema.NewRecord(model.net, IdList)
            self.output_schema.items.set_metadata(metadata)

        elif schema.equal_schemas(input_record, IdScoreList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
                expected_value=input_record.keys.metadata.expected_value)
            with core.NameScope(name):
                self.output_schema = schema.NewRecord(model.net, IdScoreList)
            self.output_schema.keys.set_metadata(metadata)

        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"

        assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo)
        if input_record.lengths.metadata:
            self.output_schema.lengths.set_metadata(
                input_record.lengths.metadata)

        # operators in this layer do not have CUDA implementation yet.
        # In addition, since the sparse feature keys that we are hashing are
        # typically on CPU originally, it makes sense to have this layer on CPU.
        self.tags.update([Tags.CPU_ONLY])
Пример #15
0
    def __init__(self,
                 model,
                 input_record,
                 input_specs,
                 name="feature_sparse_to_dense",
                 default_dense_value=None,
                 **kwargs):
        """
        `input_specs` follows the format of FeatureSpec from schema. To be more
        precise it's a namedtuple that should have:
            'feature_type', 'feature_names', 'feature_ids'
        Default_dense_value can only be 0.0 or float("NaN"). Any input that isn't
        None will be NaN.
        """
        super(FeatureSparseToDense, self).__init__(model, name, input_record,
                                                   **kwargs)
        if default_dense_value is None:
            default_dense_value = 0.0
        default_dense_value = float(default_dense_value)
        assert (np.isnan(default_dense_value) or default_dense_value
                == 0.0), "default_dense_value can only be 0.0 or NaN"

        self.input_specs = input_specs
        self.default_float_value = (model.global_constants["NAN"]
                                    if np.isnan(default_dense_value) else
                                    model.global_constants["ZERO"])
        self.zero_range = model.global_constants["ZERO_RANGE"]

        outputs = []
        for field, feature_specs in self.input_specs:
            assert len(feature_specs.feature_names) == len(
                feature_specs.feature_ids)
            if feature_specs.feature_type == "FLOAT":
                outputs.append((
                    field,
                    schema.Scalar(
                        (np.float32, (len(feature_specs.feature_ids), )),
                        self.get_next_blob_reference(field + "_output"),
                    ),
                ))
            elif feature_specs.feature_type == "ID_LIST":
                outputs.append((
                    field,
                    schema.Struct(
                        (
                            "ranges",
                            schema.Scalar(
                                (np.int32,
                                 (len(feature_specs.feature_ids), 2)),
                                self.get_next_blob_reference(field +
                                                             "_ranges"),
                            ),
                        ),
                        (
                            "values",
                            schema.Scalar(
                                np.int64,
                                self.get_next_blob_reference(field +
                                                             "_values"),
                            ),
                        ),
                    ),
                ))
            elif feature_specs.feature_type == "ID_SCORE_LIST":
                outputs.append((
                    field,
                    schema.Struct(
                        (
                            "ranges",
                            schema.Scalar(
                                (np.int32,
                                 (len(feature_specs.feature_ids), 2)),
                                self.get_next_blob_reference(field +
                                                             "_ranges"),
                            ),
                        ),
                        (
                            "ids",
                            schema.Scalar(
                                np.int64,
                                self.get_next_blob_reference(field + "_ids"),
                            ),
                        ),
                        (
                            "scores",
                            schema.Scalar(
                                np.float32,
                                self.get_next_blob_reference(field +
                                                             "_scores"),
                            ),
                        ),
                    ),
                ))
            elif feature_specs.feature_type == "EMBEDDING":
                # We don't know dimensions of embeddings in input data.
                # Even though they should match dimensions from feature config,
                # we keep ranges blob to check input data later.
                outputs.append((
                    field,
                    schema.Struct(
                        (
                            "ranges",
                            schema.Scalar(
                                (np.int32,
                                 (len(feature_specs.feature_ids), 2)),
                                self.get_next_blob_reference(field +
                                                             "_ranges"),
                            ),
                        ),
                        (
                            "values",
                            schema.Scalar(
                                np.float32,
                                self.get_next_blob_reference(field +
                                                             "_values"),
                            ),
                        ),
                    ),
                ))
            elif feature_specs.feature_type == "GENERIC_FEATURE":
                # We don't know dimensions of embeddings in input data.
                # Even though they should match dimensions from feature config,
                # we keep ranges blob to check input data later.
                # Currently this schema with ranges and values is only for
                # generic type enum 1. If new types are implemented, we need to
                # modify the ParseGeneric operator, and this part accordingly
                outputs.append((
                    field,
                    schema.Struct(
                        (
                            "ranges",
                            schema.Scalar(
                                (np.int32,
                                 (len(feature_specs.feature_ids), 2)),
                                self.get_next_blob_reference(field +
                                                             "_ranges"),
                            ),
                        ),
                        (
                            "values",
                            schema.Scalar(
                                np.float32,
                                self.get_next_blob_reference(field +
                                                             "_values"),
                            ),
                        ),
                    ),
                ))
            else:
                raise TypeError("Unsupported input type: {0}".format(
                    feature_specs.feature_type))

        # TODO(amalevich): This schema is producing ranges. And thus if there is
        # something using it it should support ranges as well. It might be
        # confusing, if we don't add better support for ranges/have it as a
        # first layer
        self.output_schema = schema.Struct(*outputs)

        # TODO(amalevich): Consider moving this data to schema, instead
        # Structs doesn't support attaching metadata to them and clonning
        # will break things badly, but this is the most elegant way to pass
        # this info around. Should we change it or it'll be too much work and
        # not worse it?
        for field, feature_specs in input_specs:
            schema.attach_metadata_to_scalars(
                self.output_schema[field],
                schema.Metadata(feature_specs=feature_specs))