Exemplo n.º 1
0
    def test_standard_scaler_serializer(self):

        standard_scaler = StandardScaler(with_mean=True, with_std=True)

        standard_scaler.mlinit(input_features='a', output_features='a_scaled')

        standard_scaler.fit(self.df[['a']])

        standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name)

        expected_mean = self.df.a.mean()
        expected_std = np.sqrt(np.var(self.df.a))

        expected_model = {
            "op": "standard_scaler",
            "attributes": {
                "mean": {
                    "type": {
                        "type": "tensor",
                        "tensor": {
                            "base": "double"
                        }
                    },
                    "value": {
                        "values": [expected_mean],
                        "dimensions": [1]
                    }
                },
                "std": {
                    "type": {
                        "type": "tensor",
                        "tensor": {
                            "base": "double"
                        }
                    },
                    "value": {
                        "values": [expected_std],
                        "dimensions": [1]
                    }
                }
            }
        }

        self.assertAlmostEqual(expected_mean,
                               standard_scaler.mean_.tolist()[0],
                               places=7)
        self.assertAlmostEqual(expected_std,
                               np.sqrt(standard_scaler.var_.tolist()[0]),
                               places=7)

        # Test model.json
        with open("{}/{}.node/model.json".format(
                self.tmp_dir, standard_scaler.name)) as json_data:
            model = json.load(json_data)

        self.assertEqual(standard_scaler.op, expected_model['op'])
        self.assertEqual(
            expected_model['attributes']['mean']['value']['dimensions'][0],
            model['attributes']['mean']['value']['dimensions'][0])
        self.assertEqual(
            expected_model['attributes']['std']['value']['dimensions'][0],
            model['attributes']['std']['value']['dimensions'][0])
        self.assertAlmostEqual(
            expected_model['attributes']['mean']['value']['values'][0],
            model['attributes']['mean']['value']['values'][0],
            places=7)
        self.assertAlmostEqual(
            expected_model['attributes']['std']['value']['values'][0],
            model['attributes']['std']['value']['values'][0],
            places=7)

        # Test node.json
        with open("{}/{}.node/node.json".format(
                self.tmp_dir, standard_scaler.name)) as json_data:
            node = json.load(json_data)

        self.assertEqual(standard_scaler.name, node['name'])
        self.assertEqual(standard_scaler.input_features,
                         node['shape']['inputs'][0]['name'])
        self.assertEqual(standard_scaler.output_features,
                         node['shape']['outputs'][0]['name'])
Exemplo n.º 2
0
    def test_standard_scaler_serializer(self):

        standard_scaler = StandardScaler(with_mean=True, with_std=True)

        extract_features = ['a']
        feature_extractor = FeatureExtractor(
            input_scalars=['a'],
            output_vector='extracted_a_output',
            output_vector_items=["{}_out".format(x) for x in extract_features])

        standard_scaler.mlinit(prior_tf=feature_extractor,
                               output_features='a_scaled')

        standard_scaler.fit(self.df[['a']])

        standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name)

        expected_mean = self.df.a.mean()
        expected_std = np.sqrt(np.var(self.df.a))

        expected_model = {
            "op": "standard_scaler",
            "attributes": {
                "mean": {
                    "double": [expected_mean],
                    "shape": {
                        "dimensions": [{
                            "size": 1,
                            "name": ""
                        }]
                    },
                    "type": "tensor"
                },
                "std": {
                    "double": [expected_std],
                    "shape": {
                        "dimensions": [{
                            "size": 1,
                            "name": ""
                        }]
                    },
                    "type": "tensor"
                }
            }
        }

        self.assertAlmostEqual(expected_mean,
                               standard_scaler.mean_.tolist()[0],
                               places=7)
        self.assertAlmostEqual(expected_std,
                               np.sqrt(standard_scaler.var_.tolist()[0]),
                               places=7)

        # Test model.json
        with open("{}/{}.node/model.json".format(
                self.tmp_dir, standard_scaler.name)) as json_data:
            model = json.load(json_data)

        self.assertEqual(standard_scaler.op, expected_model['op'])
        self.assertEqual(
            expected_model['attributes']['mean']['shape']['dimensions'][0]
            ['size'],
            model['attributes']['mean']['shape']['dimensions'][0]['size'])
        self.assertEqual(
            expected_model['attributes']['std']['shape']['dimensions'][0]
            ['size'],
            model['attributes']['std']['shape']['dimensions'][0]['size'])
        self.assertAlmostEqual(
            expected_model['attributes']['mean']['double'][0],
            model['attributes']['mean']['double'][0],
            places=7)
        self.assertAlmostEqual(
            expected_model['attributes']['std']['double'][0],
            model['attributes']['std']['double'][0],
            places=7)

        # Test node.json
        with open("{}/{}.node/node.json".format(
                self.tmp_dir, standard_scaler.name)) as json_data:
            node = json.load(json_data)

        self.assertEqual(standard_scaler.name, node['name'])
        self.assertEqual(standard_scaler.input_features,
                         node['shape']['inputs'][0]['name'])
        self.assertEqual(standard_scaler.output_features,
                         node['shape']['outputs'][0]['name'])
Exemplo n.º 3
0
    def test_standard_scaler_multi_deserializer(self):

        # Serialize a standard scaler to a bundle
        standard_scaler = StandardScaler(with_mean=True, with_std=True)

        standard_scaler.mlinit(input_features=['a', 'b'],
                               output_features=['a_scaled', 'b_scaled'])

        standard_scaler.fit(self.df[['a', 'b']])

        standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name)

        # Now deserialize it back

        node_name = "{}.node".format(standard_scaler.name)

        standard_scaler_tf = StandardScaler()

        standard_scaler_tf = standard_scaler_tf.deserialize_from_bundle(
            self.tmp_dir, node_name)

        # Transform some sample data
        res_a = standard_scaler.transform(self.df[['a', 'b']])
        res_b = standard_scaler_tf.transform(self.df[['a', 'b']])

        self.assertEqual(res_a[0][0], res_b[0][0])
        self.assertEqual(res_a[0][1], res_b[0][1])
        self.assertEqual(standard_scaler.name, standard_scaler_tf.name)
        self.assertEqual(standard_scaler.op, standard_scaler_tf.op)
        self.assertEqual(standard_scaler.mean_[0], standard_scaler_tf.mean_[0])
        self.assertEqual(standard_scaler.mean_[1], standard_scaler_tf.mean_[1])
        self.assertEqual(standard_scaler.scale_[0],
                         standard_scaler_tf.scale_[0])
        self.assertEqual(standard_scaler.scale_[1],
                         standard_scaler_tf.scale_[1])
Exemplo n.º 4
0
    def test_standard_scaler_multi_deserializer(self):

        extract_features = ['a', 'b']
        feature_extractor = FeatureExtractor(
            input_scalars=['a', 'b'],
            output_vector='extracted_multi_outputs',
            output_vector_items=["{}_out".format(x) for x in extract_features])

        # Serialize a standard scaler to a bundle
        standard_scaler = StandardScaler(with_mean=True, with_std=True)

        standard_scaler.mlinit(prior_tf=feature_extractor,
                               output_features=['a_scaled', 'b_scaled'])

        standard_scaler.fit(self.df[['a', 'b']])

        standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name)

        # Now deserialize it back

        node_name = "{}.node".format(standard_scaler.name)

        standard_scaler_tf = StandardScaler()

        standard_scaler_tf = standard_scaler_tf.deserialize_from_bundle(
            self.tmp_dir, node_name)

        # Transform some sample data
        res_a = standard_scaler.transform(self.df[['a', 'b']])
        res_b = standard_scaler_tf.transform(self.df[['a', 'b']])

        self.assertEqual(res_a[0][0], res_b[0][0])
        self.assertEqual(res_a[0][1], res_b[0][1])
        self.assertEqual(standard_scaler.name, standard_scaler_tf.name)
        self.assertEqual(standard_scaler.op, standard_scaler_tf.op)
        self.assertEqual(standard_scaler.mean_[0], standard_scaler_tf.mean_[0])
        self.assertEqual(standard_scaler.mean_[1], standard_scaler_tf.mean_[1])
        self.assertEqual(standard_scaler.scale_[0],
                         standard_scaler_tf.scale_[0])
        self.assertEqual(standard_scaler.scale_[1],
                         standard_scaler_tf.scale_[1])