def _test_model(self, model, num_samples=1, mode='random', delta=1e-2, model_dir=None, transpose_keras_result=True, one_dim_seq_flags=None, model_precision=_MLMODEL_FULL_PRECISION): # Get the model path use_tmp_folder = False if model_dir is None: use_tmp_folder = True model_dir = tempfile.mkdtemp() _ = os.path.join(model_dir, 'keras.mlmodel') # Get converted coreml model and sample input (input_names, output_names, _, coreml_input ) = self.keras_tester._get_coreml_model_params_and_test_input( model, mode, one_dim_seq_flags) from test_keras2_numeric import _get_coreml_model coreml_model = _get_coreml_model(model, input_names, output_names, model_precision=model_precision) # Now we quantize the model and dequantize it. We then use this model # as our full precision model since quantizing this model again will # result in 0 quantization error. coreml_spec = coreml_model.get_spec() quantization_utils._quantize_spec_weights(spec=coreml_spec, nbits=self.qbits, quantization_mode=self.qmode, lut_function=self.custom_lut) # De-quantize model quantization_utils._dequantize_nn_spec(spec=coreml_spec.neuralNetwork) full_precision_model_spec = coreml_spec # Quantize model from another copy quantized_model_spec = quantization_utils._quantize_spec_weights( spec=coreml_model.get_spec(), nbits=self.qbits, quantization_mode=self.qmode, lut_function=self.custom_lut) full_precision_model = coremltools.models.MLModel( full_precision_model_spec) quantized_model = coremltools.models.MLModel(quantized_model_spec) self._run_quantized_test(coreml_input, full_precision_model, quantized_model, delta) # Clean up after ourselves if use_tmp_folder and os.path.exists(model_dir): shutil.rmtree(model_dir)
def test_batched_matmul_1bit_weight_quantized(self): self.initialize() self.builder.add_batched_mat_mul( name="batched_matmul", input_names=["data"], output_name="output", weight_matrix_rows=self.Cin, weight_matrix_columns=self.Cout, W=self.W, ) _quantize_spec_weights(self.builder.spec, 1, _QUANTIZATION_MODE_LINEAR_QUANTIZATION) self.compare()