Пример #1
0
 def testMapInStructIndexing(self):
     a = schema.Map(
         schema.Scalar(dtype=np.int32),
         schema.Scalar(dtype=np.float32),
     )
     s = schema.Struct(('field1', schema.Scalar(dtype=np.int32)),
                       ('field2', a))
     self.assertEquals(s['field2:values:keys'], a.keys)
     self.assertEquals(s['field2:values:values'], a.values)
     with self.assertRaises(KeyError):
         s['fields2:keys:non_existent']
Пример #2
0
    def __init__(self,
                 model,
                 input_record,
                 seed=0,
                 modulo=None,
                 use_hashing=True,
                 name='sparse_feature_hash',
                 **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record,
                                                **kwargs)

        self.seed = seed
        self.use_hashing = use_hashing
        if schema.equal_schemas(input_record, IdList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, self.get_next_blob_reference("hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.List(
                values=hashed_indices,
                lengths_blob=input_record.lengths,
            )
        elif schema.equal_schemas(input_record, IdScoreList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, self.get_next_blob_reference("hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.Map(
                keys=hashed_indices,
                values=input_record.values,
                lengths_blob=input_record.lengths,
            )
        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"

        assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo)

        # operators in this layer do not have CUDA implementation yet.
        # In addition, since the sparse feature keys that we are hashing are
        # typically on CPU originally, it makes sense to have this layer on CPU.
        self.tags.update([Tags.CPU_ONLY])
Пример #3
0
    def __init__(self,
                 model,
                 input_record,
                 seed,
                 name='sparse_feature_hash',
                 **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record,
                                                **kwargs)

        self.seed = seed
        self.lengths_blob = schema.Scalar(
            np.int32,
            model.net.NextScopedBlob(name + "_lengths"),
        )

        if schema.equal_schemas(input_record, IdList):
            self.modulo = self.extract_hash_size(input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, model.net.NextScopedBlob(name + "_hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.List(
                values=hashed_indices,
                lengths_blob=self.lengths_blob,
            )
        elif schema.equal_schemas(input_record, IdScoreList):
            self.values_blob = schema.Scalar(
                np.float32,
                model.net.NextScopedBlob(name + "_values"),
            )
            self.modulo = self.extract_hash_size(input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, model.net.NextScopedBlob(name + "_hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.Map(
                keys=hashed_indices,
                values=self.values_blob,
                lengths_blob=self.lengths_blob,
            )
        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"
Пример #4
0
 def testFromColumnList(self):
     st = schema.Struct(('a', schema.Scalar()),
                        ('b', schema.List(schema.Scalar())),
                        ('c', schema.Map(schema.Scalar(), schema.Scalar())))
     columns = st.field_names()
     # test that recovery works for arbitrary order
     for _ in range(10):
         some_blobs = [core.BlobReference('blob:' + x) for x in columns]
         rec = schema.from_column_list(columns, col_blobs=some_blobs)
         self.assertTrue(rec.has_blobs())
         self.assertEqual(sorted(st.field_names()),
                          sorted(rec.field_names()))
         self.assertEqual(
             [str(blob) for blob in rec.field_blobs()],
             [str('blob:' + name) for name in rec.field_names()])
         random.shuffle(columns)
Пример #5
0
    def __init__(self,
                 model,
                 input_record,
                 seed=0,
                 modulo=None,
                 use_hashing=True,
                 name='sparse_feature_hash',
                 **kwargs):
        super(SparseFeatureHash, self).__init__(model, name, input_record,
                                                **kwargs)

        self.seed = seed
        self.use_hashing = use_hashing
        if schema.equal_schemas(input_record, IdList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.items.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.items.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, self.get_next_blob_reference("hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.List(
                values=hashed_indices,
                lengths_blob=input_record.lengths,
            )
        elif schema.equal_schemas(input_record, IdScoreList):
            self.modulo = modulo or self.extract_hash_size(
                input_record.keys.metadata)
            metadata = schema.Metadata(
                categorical_limit=self.modulo,
                feature_specs=input_record.keys.metadata.feature_specs,
            )
            hashed_indices = schema.Scalar(
                np.int64, self.get_next_blob_reference("hashed_idx"))
            hashed_indices.set_metadata(metadata)
            self.output_schema = schema.Map(
                keys=hashed_indices,
                values=input_record.values,
                lengths_blob=input_record.lengths,
            )
        else:
            assert False, "Input type must be one of (IdList, IdScoreList)"

        assert self.modulo >= 1, 'Unexpected modulo: {}'.format(self.modulo)
Пример #6
0
    def create_net(self):
        net = core.Net("feature_extractor")
        init_net = core.Net("feature_extractor_init")
        missing_scalar = self.create_const(init_net, "MISSING_SCALAR",
                                           MISSING_VALUE)

        input_schema = schema.Struct((
            "float_features",
            schema.Map(
                keys=core.BlobReference("input/float_features.keys"),
                values=core.BlobReference("input/float_features.values"),
                lengths_blob=core.BlobReference(
                    "input/float_features.lengths"),
            ),
        ))

        input_record = net.set_input_record(input_schema)

        state = self.extract_float_features(
            net,
            "state",
            input_record.float_features,
            self.sorted_state_features,
            missing_scalar,
        )

        output_record = schema.Struct(("state", state))

        if self.sorted_action_features:
            action = self.extract_float_features(
                net,
                "action",
                input_record.float_features,
                self.sorted_action_features,
                missing_scalar,
            )
            output_record += schema.Struct(("action", action))

        net.set_output_record(output_record)

        return FeatureExtractorNet(net, init_net)
Пример #7
0
## @package layers
# Module caffe2.python.layers.layers
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from caffe2.python import core, schema, scope
from caffe2.python.layers.tags import TagContext

from collections import namedtuple
import numpy as np

# Some types to simplify descriptions of things traveling between ops
IdList = schema.List(np.int64)
IdScoreList = schema.Map(np.int64, np.float32)


def get_categorical_limit(record):
    if schema.equal_schemas(record, IdList):
        key = 'items'
    elif schema.equal_schemas(record, IdScoreList, check_field_types=False):
        key = 'keys'
    else:
        raise NotImplementedError()
    assert record[key].metadata is not None, (
        "Blob {} doesn't have metadata".format(str(record[key]())))
    return record[key].metadata.categorical_limit


def set_request_only(field):
Пример #8
0
def map_schema():
    return schema.Map(schema.Scalar(), schema.Scalar())
Пример #9
0
    def create_net(self):
        net = core.Net("feature_extractor")
        init_net = core.Net("feature_extractor_init")
        missing_scalar = self.create_const(init_net, "MISSING_SCALAR",
                                           MISSING_VALUE)

        input_schema = schema.Struct((
            "float_features",
            schema.Map(
                keys=core.BlobReference("input/float_features.keys"),
                values=core.BlobReference("input/float_features.values"),
                lengths_blob=core.BlobReference(
                    "input/float_features.lengths"),
            ),
        ))

        input_record = net.set_input_record(input_schema)

        state = self.extract_float_features(
            net,
            "state",
            input_record.float_features,
            self.sorted_state_features,
            missing_scalar,
        )

        if self.sorted_action_features:
            action = self.extract_float_features(
                net,
                "action",
                input_record.float_features,
                self.sorted_action_features,
                missing_scalar,
            )

        if self.normalize:
            C2.set_net_and_init_net(net, init_net)
            state, _ = PreprocessorNet().normalize_dense_matrix(
                state,
                self.sorted_state_features,
                self.state_normalization_parameters,
                blobname_prefix="state",
                split_expensive_feature_groups=True,
            )
            if self.sorted_action_features:
                action, _ = PreprocessorNet().normalize_dense_matrix(
                    action,
                    self.sorted_action_features,
                    self.action_normalization_parameters,
                    blobname_prefix="action",
                    split_expensive_feature_groups=True,
                )
            C2.set_net_and_init_net(None, None)

        output_record = schema.Struct(("state", state))
        if self.sorted_action_features:
            output_record += schema.Struct(("action", action))

        net.set_output_record(output_record)

        return FeatureExtractorNet(net, init_net)