def testSetRequestOnly(self): input_record = schema.Scalar(np.int64) schema.attach_metadata_to_scalars( input_record, schema.Metadata( categorical_limit=100000000, expected_value=99, feature_specs=schema.FeatureSpec(feature_ids=[1, 100, 1001]))) set_request_only(input_record) self.assertEqual(input_record.metadata.categorical_limit, 100000000) self.assertEqual(input_record.metadata.expected_value, 99) self.assertEqual(input_record.metadata.feature_specs.feature_ids, [1, 100, 1001])
def testSetRequestOnly(self): input_record = schema.Scalar(np.int64) schema.attach_metadata_to_scalars( input_record, schema.Metadata( categorical_limit=100000000, expected_value=99, feature_specs=schema.FeatureSpec( feature_ids=[1, 100, 1001] ) ) ) set_request_only(input_record) self.assertEqual(input_record.metadata.categorical_limit, 100000000) self.assertEqual(input_record.metadata.expected_value, 99) self.assertEqual( input_record.metadata.feature_specs.feature_ids, [1, 100, 1001] )
def __init__(self, model, input_record, input_specs, name='sparse_to_dense', **kwargs): """ `input_specs` follows the format of FeatureSpec from schema. To be more precise it's a namedtuple that should have: 'feature_type', 'feature_names', 'feature_ids' """ super(SparseToDense, self).__init__(model, name, input_record, **kwargs) self.input_specs = input_specs outputs = [] for field, feature_specs in self.input_specs: assert len(feature_specs.feature_names) ==\ len(feature_specs.feature_ids) if feature_specs.feature_type == 'FLOAT': outputs.append( (field, schema.Scalar( (np.float32, (len(feature_specs.feature_ids), )), model.net.NextScopedBlob(name + '_' + field + '_output')))) elif feature_specs.feature_type == 'ID_LIST': outputs.append( (field, schema.Struct( ( 'ranges', schema.Scalar( (np.int32, (len(feature_specs.feature_ids), 2)), model.net.NextScopedBlob(name + '_' + field + '_ranges')), ), ('values', input_record[field].values.items), ))) elif feature_specs.feature_type == 'ID_SCORE_LIST': outputs.append( (field, schema.Struct( ( 'ranges', schema.Scalar( (np.int32, (len(feature_specs.feature_ids), 2)), model.net.NextScopedBlob(name + '_' + field + '_ranges')), ), ('ids', input_record[field].values.keys), ('scores', input_record[field].values.values), ))) else: raise TypeError("Unsupported input type: {0}".format( feature_specs.feature_type)) # TODO(amalevich): This schema is producing ranges. And thus if there is # something using it it should support ranges as well. It might be # confusing, if we don't add better support for ranges/have it as a # first layer self.output_schema = schema.Struct(*outputs) # TODO(amalevich): Consider moving this data to schema, instead # Structs doens't support attaching metadata to them and clonning # will break things badly, but this is the most elegant way to pass # this info around. Should we change it or it'll be too much work and # not worse it? for field, feature_specs in input_specs: schema.attach_metadata_to_scalars( self.output_schema[field], schema.Metadata(feature_specs=feature_specs)) self.zero = model.global_constants['ZERO'] self.zero_range = model.global_constants['ZERO_RANGE']
def __init__(self, model, input_record, input_specs, name='feature_sparse_to_dense', **kwargs): """ `input_specs` follows the format of FeatureSpec from schema. To be more precise it's a namedtuple that should have: 'feature_type', 'feature_names', 'feature_ids' """ super(FeatureSparseToDense, self).__init__(model, name, input_record, **kwargs) self.input_specs = input_specs outputs = [] for field, feature_specs in self.input_specs: assert len(feature_specs.feature_names) ==\ len(feature_specs.feature_ids) if feature_specs.feature_type == 'FLOAT': outputs.append(( field, schema.Scalar( (np.float32, (len(feature_specs.feature_ids), )), self.get_next_blob_reference(field + '_output') ) )) elif feature_specs.feature_type == 'ID_LIST': outputs.append(( field, schema.Struct( ('ranges', schema.Scalar( ( np.int32, (len(feature_specs.feature_ids), 2) ), self.get_next_blob_reference( field + '_ranges') ), ), ('values', schema.Scalar(np.int64, self.get_next_blob_reference( field + '_values') ), ) ) )) elif feature_specs.feature_type == 'ID_SCORE_LIST': outputs.append(( field, schema.Struct( ('ranges', schema.Scalar( ( np.int32, (len(feature_specs.feature_ids), 2) ), self.get_next_blob_reference( field + '_ranges') ), ), ('ids', schema.Scalar(np.int64, self.get_next_blob_reference( field + '_ids') ), ), ('scores', schema.Scalar(np.float32, self.get_next_blob_reference( field + '_scores') ), ) ) )) else: raise TypeError( "Unsupported input type: {0}". format(feature_specs.feature_type)) # TODO(amalevich): This schema is producing ranges. And thus if there is # something using it it should support ranges as well. It might be # confusing, if we don't add better support for ranges/have it as a # first layer self.output_schema = schema.Struct( *outputs ) # TODO(amalevich): Consider moving this data to schema, instead # Structs doens't support attaching metadata to them and clonning # will break things badly, but this is the most elegant way to pass # this info around. Should we change it or it'll be too much work and # not worse it? for field, feature_specs in input_specs: schema.attach_metadata_to_scalars( self.output_schema[field], schema.Metadata( feature_specs=feature_specs) ) self.zero = model.global_constants['ZERO'] self.zero_range = model.global_constants['ZERO_RANGE']
def __init__(self, model, input_record, inner_shape, reducer, weight_init=None, weight_optim=None, name='sparse_lookup', **kwargs): super(SparseLookup, self).__init__(model, name, input_record, **kwargs) if isinstance(inner_shape, int): inner_shape = [inner_shape] assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\ "Unexpected type for inner_shape, expected list or tuple, got {0}".\ format(type(inner_shape)) # TODO Add some asserts about input type assert reducer in self._supported_reducers, "Unsupported reducer: {}".\ format(reducer) self.reducer = reducer assert input_record.items.metadata is not None,\ "Features without metadata are not supported" input_dim = input_record.items.metadata.categorical_limit assert input_dim is not None, "Unbounded features are not supported" self.output_schema = schema.Scalar( (np.float32, inner_shape), model.net.NextScopedBlob(name + '_output'), ) if self.request_only: schema.attach_metadata_to_scalars( self.output_schema, schema.Metadata(categorical_limit=None, expected_value=None, feature_specs=schema.FeatureSpec( feature_is_request_only=True))) scale = math.sqrt(1.0 / input_dim) self.shape = [input_dim] + inner_shape self.weight_init = weight_init if weight_init else ('UniformFill', { 'min': -scale, 'max': scale }) self.w = model.net.NextScopedBlob(name + "_w") self.params.append( LayerParameter(parameter=self.w, initializer=core.CreateOperator( self.weight_init[0], [], self.w, shape=self.shape, **self.weight_init[1]), optimizer=weight_optim)) if reducer == 'PositionWeighted': self.pos_w = model.net.NextScopedBlob(name + "_pos_w") self.params.append( LayerParameter(parameter=self.pos_w, initializer=core.CreateOperator('ConstantFill', [], self.pos_w, shape=[ input_dim, ], value=1.0), optimizer=weight_optim))
def __init__(self, model, input_record, input_specs, name="feature_sparse_to_dense", default_dense_value=None, **kwargs): """ `input_specs` follows the format of FeatureSpec from schema. To be more precise it's a namedtuple that should have: 'feature_type', 'feature_names', 'feature_ids' Default_dense_value can only be 0.0 or float("NaN"). Any input that isn't None will be NaN. """ super(FeatureSparseToDense, self).__init__(model, name, input_record, **kwargs) if default_dense_value is None: default_dense_value = 0.0 default_dense_value = float(default_dense_value) assert (np.isnan(default_dense_value) or default_dense_value == 0.0), "default_dense_value can only be 0.0 or NaN" self.input_specs = input_specs self.default_float_value = (model.global_constants["NAN"] if np.isnan(default_dense_value) else model.global_constants["ZERO"]) self.zero_range = model.global_constants["ZERO_RANGE"] outputs = [] for field, feature_specs in self.input_specs: assert len(feature_specs.feature_names) == len( feature_specs.feature_ids) if feature_specs.feature_type == "FLOAT": outputs.append(( field, schema.Scalar( (np.float32, (len(feature_specs.feature_ids), )), self.get_next_blob_reference(field + "_output"), ), )) elif feature_specs.feature_type == "ID_LIST": outputs.append(( field, schema.Struct( ( "ranges", schema.Scalar( (np.int32, (len(feature_specs.feature_ids), 2)), self.get_next_blob_reference(field + "_ranges"), ), ), ( "values", schema.Scalar( np.int64, self.get_next_blob_reference(field + "_values"), ), ), ), )) elif feature_specs.feature_type == "ID_SCORE_LIST": outputs.append(( field, schema.Struct( ( "ranges", schema.Scalar( (np.int32, (len(feature_specs.feature_ids), 2)), self.get_next_blob_reference(field + "_ranges"), ), ), ( "ids", schema.Scalar( np.int64, self.get_next_blob_reference(field + "_ids"), ), ), ( "scores", schema.Scalar( np.float32, self.get_next_blob_reference(field + "_scores"), ), ), ), )) elif feature_specs.feature_type == "EMBEDDING": # We don't know dimensions of embeddings in input data. # Even though they should match dimensions from feature config, # we keep ranges blob to check input data later. outputs.append(( field, schema.Struct( ( "ranges", schema.Scalar( (np.int32, (len(feature_specs.feature_ids), 2)), self.get_next_blob_reference(field + "_ranges"), ), ), ( "values", schema.Scalar( np.float32, self.get_next_blob_reference(field + "_values"), ), ), ), )) elif feature_specs.feature_type == "GENERIC_FEATURE": # We don't know dimensions of embeddings in input data. # Even though they should match dimensions from feature config, # we keep ranges blob to check input data later. # Currently this schema with ranges and values is only for # generic type enum 1. If new types are implemented, we need to # modify the ParseGeneric operator, and this part accordingly outputs.append(( field, schema.Struct( ( "ranges", schema.Scalar( (np.int32, (len(feature_specs.feature_ids), 2)), self.get_next_blob_reference(field + "_ranges"), ), ), ( "values", schema.Scalar( np.float32, self.get_next_blob_reference(field + "_values"), ), ), ), )) else: raise TypeError("Unsupported input type: {0}".format( feature_specs.feature_type)) # TODO(amalevich): This schema is producing ranges. And thus if there is # something using it it should support ranges as well. It might be # confusing, if we don't add better support for ranges/have it as a # first layer self.output_schema = schema.Struct(*outputs) # TODO(amalevich): Consider moving this data to schema, instead # Structs doesn't support attaching metadata to them and clonning # will break things badly, but this is the most elegant way to pass # this info around. Should we change it or it'll be too much work and # not worse it? for field, feature_specs in input_specs: schema.attach_metadata_to_scalars( self.output_schema[field], schema.Metadata(feature_specs=feature_specs))
def __init__(self, model, input_record, inner_shape, reducer, weight_init=None, weight_optim=None, name='sparse_lookup', **kwargs): super(SparseLookup, self).__init__(model, name, input_record, **kwargs) if isinstance(inner_shape, int): inner_shape = [inner_shape] assert isinstance(inner_shape, list) or isinstance(inner_shape, tuple),\ "Unexpected type for inner_shape, expected list or tuple, got {0}".\ format(type(inner_shape)) # TODO Add some asserts about input type assert reducer in self._supported_reducers, "Unsupported reducer: {}".\ format(reducer) self.reducer = reducer assert input_record.items.metadata is not None,\ "Features without metadata are not supported" input_dim = input_record.items.metadata.categorical_limit assert input_dim is not None, "Unbounded features are not supported" self.output_schema = schema.Scalar( (np.float32, inner_shape), core.ScopedBlobReference(model.net.NextName(self.name + '_output'))) if self.request_only: schema.attach_metadata_to_scalars( self.output_schema, schema.Metadata( categorical_limit=None, expected_value=None, feature_specs=schema.FeatureSpec( feature_is_request_only=True ) ) ) scale = math.sqrt(1.0 / input_dim) self.shape = [input_dim] + inner_shape self.weight_init = weight_init if weight_init else ( 'UniformFill', {'min': -scale, 'max': scale}) self.w = core.ScopedBlobReference(model.net.NextName(self.name + "_w")) self.params.append( LayerParameter( parameter=self.w, initializer=core.CreateOperator(self.weight_init[0], [], self.w, shape=self.shape, **self.weight_init[1] ), optimizer=weight_optim )) if reducer == 'PositionWeighted': self.pos_w = core.ScopedBlobReference( model.net.NextName(self.name + "_pos_w")) self.params.append( LayerParameter( parameter=self.pos_w, initializer=core.CreateOperator('ConstantFill', [], self.pos_w, shape=[input_dim, ], value=1.0 ), optimizer=weight_optim ))