def test_standard_scaler_serializer(self): standard_scaler = StandardScaler(with_mean=True, with_std=True) standard_scaler.mlinit(input_features='a', output_features='a_scaled') standard_scaler.fit(self.df[['a']]) standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name) expected_mean = self.df.a.mean() expected_std = np.sqrt(np.var(self.df.a)) expected_model = { "op": "standard_scaler", "attributes": { "mean": { "type": { "type": "tensor", "tensor": { "base": "double" } }, "value": { "values": [expected_mean], "dimensions": [1] } }, "std": { "type": { "type": "tensor", "tensor": { "base": "double" } }, "value": { "values": [expected_std], "dimensions": [1] } } } } self.assertAlmostEqual(expected_mean, standard_scaler.mean_.tolist()[0], places=7) self.assertAlmostEqual(expected_std, np.sqrt(standard_scaler.var_.tolist()[0]), places=7) # Test model.json with open("{}/{}.node/model.json".format( self.tmp_dir, standard_scaler.name)) as json_data: model = json.load(json_data) self.assertEqual(standard_scaler.op, expected_model['op']) self.assertEqual( expected_model['attributes']['mean']['value']['dimensions'][0], model['attributes']['mean']['value']['dimensions'][0]) self.assertEqual( expected_model['attributes']['std']['value']['dimensions'][0], model['attributes']['std']['value']['dimensions'][0]) self.assertAlmostEqual( expected_model['attributes']['mean']['value']['values'][0], model['attributes']['mean']['value']['values'][0], places=7) self.assertAlmostEqual( expected_model['attributes']['std']['value']['values'][0], model['attributes']['std']['value']['values'][0], places=7) # Test node.json with open("{}/{}.node/node.json".format( self.tmp_dir, standard_scaler.name)) as json_data: node = json.load(json_data) self.assertEqual(standard_scaler.name, node['name']) self.assertEqual(standard_scaler.input_features, node['shape']['inputs'][0]['name']) self.assertEqual(standard_scaler.output_features, node['shape']['outputs'][0]['name'])
def test_standard_scaler_serializer(self): standard_scaler = StandardScaler(with_mean=True, with_std=True) extract_features = ['a'] feature_extractor = FeatureExtractor( input_scalars=['a'], output_vector='extracted_a_output', output_vector_items=["{}_out".format(x) for x in extract_features]) standard_scaler.mlinit(prior_tf=feature_extractor, output_features='a_scaled') standard_scaler.fit(self.df[['a']]) standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name) expected_mean = self.df.a.mean() expected_std = np.sqrt(np.var(self.df.a)) expected_model = { "op": "standard_scaler", "attributes": { "mean": { "double": [expected_mean], "shape": { "dimensions": [{ "size": 1, "name": "" }] }, "type": "tensor" }, "std": { "double": [expected_std], "shape": { "dimensions": [{ "size": 1, "name": "" }] }, "type": "tensor" } } } self.assertAlmostEqual(expected_mean, standard_scaler.mean_.tolist()[0], places=7) self.assertAlmostEqual(expected_std, np.sqrt(standard_scaler.var_.tolist()[0]), places=7) # Test model.json with open("{}/{}.node/model.json".format( self.tmp_dir, standard_scaler.name)) as json_data: model = json.load(json_data) self.assertEqual(standard_scaler.op, expected_model['op']) self.assertEqual( expected_model['attributes']['mean']['shape']['dimensions'][0] ['size'], model['attributes']['mean']['shape']['dimensions'][0]['size']) self.assertEqual( expected_model['attributes']['std']['shape']['dimensions'][0] ['size'], model['attributes']['std']['shape']['dimensions'][0]['size']) self.assertAlmostEqual( expected_model['attributes']['mean']['double'][0], model['attributes']['mean']['double'][0], places=7) self.assertAlmostEqual( expected_model['attributes']['std']['double'][0], model['attributes']['std']['double'][0], places=7) # Test node.json with open("{}/{}.node/node.json".format( self.tmp_dir, standard_scaler.name)) as json_data: node = json.load(json_data) self.assertEqual(standard_scaler.name, node['name']) self.assertEqual(standard_scaler.input_features, node['shape']['inputs'][0]['name']) self.assertEqual(standard_scaler.output_features, node['shape']['outputs'][0]['name'])
def test_standard_scaler_multi_deserializer(self): # Serialize a standard scaler to a bundle standard_scaler = StandardScaler(with_mean=True, with_std=True) standard_scaler.mlinit(input_features=['a', 'b'], output_features=['a_scaled', 'b_scaled']) standard_scaler.fit(self.df[['a', 'b']]) standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name) # Now deserialize it back node_name = "{}.node".format(standard_scaler.name) standard_scaler_tf = StandardScaler() standard_scaler_tf = standard_scaler_tf.deserialize_from_bundle( self.tmp_dir, node_name) # Transform some sample data res_a = standard_scaler.transform(self.df[['a', 'b']]) res_b = standard_scaler_tf.transform(self.df[['a', 'b']]) self.assertEqual(res_a[0][0], res_b[0][0]) self.assertEqual(res_a[0][1], res_b[0][1]) self.assertEqual(standard_scaler.name, standard_scaler_tf.name) self.assertEqual(standard_scaler.op, standard_scaler_tf.op) self.assertEqual(standard_scaler.mean_[0], standard_scaler_tf.mean_[0]) self.assertEqual(standard_scaler.mean_[1], standard_scaler_tf.mean_[1]) self.assertEqual(standard_scaler.scale_[0], standard_scaler_tf.scale_[0]) self.assertEqual(standard_scaler.scale_[1], standard_scaler_tf.scale_[1])
def test_standard_scaler_multi_deserializer(self): extract_features = ['a', 'b'] feature_extractor = FeatureExtractor( input_scalars=['a', 'b'], output_vector='extracted_multi_outputs', output_vector_items=["{}_out".format(x) for x in extract_features]) # Serialize a standard scaler to a bundle standard_scaler = StandardScaler(with_mean=True, with_std=True) standard_scaler.mlinit(prior_tf=feature_extractor, output_features=['a_scaled', 'b_scaled']) standard_scaler.fit(self.df[['a', 'b']]) standard_scaler.serialize_to_bundle(self.tmp_dir, standard_scaler.name) # Now deserialize it back node_name = "{}.node".format(standard_scaler.name) standard_scaler_tf = StandardScaler() standard_scaler_tf = standard_scaler_tf.deserialize_from_bundle( self.tmp_dir, node_name) # Transform some sample data res_a = standard_scaler.transform(self.df[['a', 'b']]) res_b = standard_scaler_tf.transform(self.df[['a', 'b']]) self.assertEqual(res_a[0][0], res_b[0][0]) self.assertEqual(res_a[0][1], res_b[0][1]) self.assertEqual(standard_scaler.name, standard_scaler_tf.name) self.assertEqual(standard_scaler.op, standard_scaler_tf.op) self.assertEqual(standard_scaler.mean_[0], standard_scaler_tf.mean_[0]) self.assertEqual(standard_scaler.mean_[1], standard_scaler_tf.mean_[1]) self.assertEqual(standard_scaler.scale_[0], standard_scaler_tf.scale_[0]) self.assertEqual(standard_scaler.scale_[1], standard_scaler_tf.scale_[1])