def test_embeddingND_quantize(self): input_features = [("data", datatypes.Array(10, 1))] output_features = [("output", None)] builder = neural_network.NeuralNetworkBuilder( input_features, output_features, disable_rank5_shape_mapping=True) builder.add_embedding_nd( name="embedding_nd", input_name="data", output_name="output", vocab_size=300, embedding_size=20, W=np.random.rand(20, 300), ) spec = builder.spec model_fp32 = coremltools.models.MLModel(spec) self.assertEqual( len(spec.neuralNetwork.layers[0].embeddingND.weights.floatValue), 6000) # quantize to FP16 model_fp16 = quantization_utils.quantize_weights(model_fp32, nbits=16) spec_fp16 = model_fp16.get_spec() self.assertEqual( len(spec_fp16.neuralNetwork.layers[0].embeddingND.weights. floatValue), 0) self.assertEqual( len(spec_fp16.neuralNetwork.layers[0].embeddingND.weights. float16Value), 2 * 6000, ) # quantize to uint8 model_uint8 = quantization_utils.quantize_weights(model_fp32, nbits=8) spec_uint8 = model_uint8.get_spec() self.assertEqual( len(spec_uint8.neuralNetwork.layers[0].embeddingND.weights. floatValue), 0) self.assertEqual( len(spec_uint8.neuralNetwork.layers[0].embeddingND.weights. float16Value), 0) self.assertEqual( len(spec_uint8.neuralNetwork.layers[0].embeddingND.weights.rawValue ), 6000) # quantize to uint5 model_uint5 = quantization_utils.quantize_weights(model_fp32, nbits=5) spec_uint5 = model_uint5.get_spec() self.assertEqual( len(spec_uint5.neuralNetwork.layers[0].embeddingND.weights. floatValue), 0) self.assertEqual( len(spec_uint5.neuralNetwork.layers[0].embeddingND.weights. float16Value), 0) self.assertEqual( len(spec_uint5.neuralNetwork.layers[0].embeddingND.weights.rawValue ), 3750) # 3750 = 5*6000/8
def test_nn_partial_fp16_make_updatable_fail(self): nn_builder = self.create_base_builder() model_path = os.path.join(self.model_dir, "updatable_creation.mlmodel") print(model_path) save_spec(nn_builder.spec, model_path) mlmodel = MLModel(model_path) # fails since updatable models cannot get quantized to FP16 with self.assertRaises(Exception): quantization_utils.quantize_weights(mlmodel, 16, "linear")
def main(args): if args.type == 'FLOAT32': if args.model_dir[-3:] != '.pb': print("Error: the model type must be .pb file") return else: coreml_model = tfcoreml.convert( tf_model_path=args.model_dir, mlmodel_path=args.output_file, input_name_shape_dict={'input': [1, 160, 160, 3]}, output_feature_names=["embeddings"], minimum_ios_deployment_target='13') return else: if args.model_dir[-8:] != '.mlmodel': print("Error: the model type must be .mlmodel") return if args.type == 'FLOAT16': model_spec = coremltools.utils.load_spec(args.model_dir) model_fp16_spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16( model_spec) coremltools.utils.save_spec(model_fp16_spec, args.output_file) return else: model = coremltools.models.MLModel(args.model_dir) bit = int(args.type[-1]) print("quantization in INT" + str(bit)) quantized_model = quantization_utils.quantize_weights( model, bit, "linear") quantized_model.save(args.output_file) return print('File correctly saved in:', args.output_file)
def test_linear_quant_batchedmatmul_8bit(self): np.random.seed(1988) W = np.random.rand(32, 32) * 2.0 - 1 bias = np.random.rand(32) input_features = [("data", datatypes.Array(2, 32))] output_features = [("out", None)] builder = NeuralNetworkBuilder( input_features, output_features, disable_rank5_shape_mapping=True ) builder.add_batched_mat_mul( name="batched_matmul", input_names=["data"], output_name="out", weight_matrix_rows=32, weight_matrix_columns=32, W=W, bias=bias, ) mlmodel = MLModel(builder.spec) q_mlmodel = quantize_weights(mlmodel, 8) q_spec = q_mlmodel.get_spec() q_layer = q_spec.neuralNetwork.layers[0].batchedMatmul self.assertTrue(len(q_layer.weights.floatValue) == 0) self.assertTrue(len(q_layer.weights.rawValue) > 0) data = np.random.rand(2, 32) data_dict = {"data": data} out = q_mlmodel.predict(data_dict, useCPUOnly=True)["out"] expected_out = np.matmul(data, W) + bias self.assertTrue(out.shape == expected_out.shape) self.assertTrue(np.allclose(out.flatten(), expected_out.flatten(), atol=0.1))
def convert(model: Model, model_name=None, nbits=32, quantization_mode="linear", class_labels=["stay", "walk", "jog", "skip", "stUp", "stDown"]): # add reshape layer model = add_reshape_layer(model) classifier_config = ct.ClassifierConfig(class_labels=class_labels) mlmodel = ct.convert(model, classifier_config=classifier_config) # Quantization options available_options = list(range(1, 9)) + [16] if nbits in available_options: mlmodel = quantization_utils.quantize_weights( mlmodel, nbits=nbits, quantization_mode=quantization_mode) # Add description if model_name is not None: # if model is quantized if nbits in available_options: if nbits == 16: model_name = "{}, float {} bit".format(model_name, nbits) else: model_name = "{}, int {} bit".format(model_name, nbits) mlmodel.short_description = "Activity Classifier ({})".format( model_name) mlmodel.input_description[ "input"] = "Input acceleration data to be classified" mlmodel.output_description["classLabel"] = "Most likely activity" mlmodel.output_description["Identity"] = "Probability of each activity" return mlmodel
def onnx_to_coreml(model_name:str, half_precision:bool, quarter_precision:bool, return_or_save:str='save'): """ Arguments --------- model_name: str filename of the model to convert half_precision: bool whether to convert the coreml model to half precision quarter_precision: bool whether to conver the coreml model to quarter precision return_or_save: str == "save" or "return" if "save", the model will saved as the model_name. if 'return' the model object is returned Returns ------- coreml_model: coreml-model-object if return_or_save == 'return', the model object is returned. otherwise, None. """ assert return_or_save in ['save', 'return'], \ f"return_or_save must be 'save' or 'return'. {return_or_save} entered." assert not(half_precision and quarter_precision), \ "half-precision and quarter-precision flags can't both be used during same call." onnx_path = os.path.join("onnx_models", model_name+"_model.onnx") coreml_path = os.path.join("coreml_models", model_name+"_model.mlmodel") onnx_model = onnx.load(onnx_path) coreml_model = convert(model=onnx_model, minimum_ios_deployment_target = '13') if half_precision: coreml_model = quantization_utils.quantize_weights(coreml_model, nbits=16) print("\n~~~~ Converted CoreML Model to half precision ~~~~\n") elif quarter_precision: coreml_model = quantization_utils.quantize_weights(coreml_model, nbits=8) print("\n~~~~ Converted CoreML Model to quarter precision ~~~~\n") else: print("\n~~~~ CoreML Model kept at single precision ~~~~\n") if return_or_save == 'save': coreml_model.save(coreml_path) print(f"Onnx model successfully converted to CoreML at: {coreml_path}") elif return_or_save == 'return': return onnx_model, coreml_model
def test_8bit_symmetric_and_skips(self): from keras.models import Sequential from keras.layers import Conv2D def stable_rel_error(x, ref): err = x - ref denom = np.maximum(np.abs(ref), np.ones_like(ref)) return np.abs(err) / denom np.random.seed(1988) input_dim = 16 num_kernels, kernel_height, kernel_width, input_channels = 64, 3, 3, 32 # Define a model model = Sequential() model.add( Conv2D(input_shape=(input_dim, input_dim, input_channels), filters=num_kernels, kernel_size=(kernel_height, kernel_width))) # Set some random weights weight, bias = model.layers[0].get_weights() num_filters = weight.shape[-1] filter_shape = weight.shape[:-1] new_weight = np.stack([ 4.0 * np.random.rand(*filter_shape) - 2 for i in range(num_filters) ], axis=-1) model.layers[0].set_weights([new_weight, bias]) mlmodel = keras_converter.convert(model, ['data'], ['output_0']) selector = quantization_utils.AdvancedQuantizedLayerSelector( skip_layer_types=['batchnorm', 'bias', 'depthwiseConv'], minimum_conv_kernel_channels=4, minimum_conv_weight_count=4096) q_mlmodel = quantization_utils.quantize_weights(mlmodel, 8, selector=selector) input_shape = (1, 1, input_channels, input_dim, input_dim) input_val = 2 * np.random.rand(*input_shape) - 1 coreml_input = {'data': input_val} coreml_output = mlmodel.predict(coreml_input) q_coreml_output = q_mlmodel.predict(coreml_input) val = coreml_output['output_0'] q_val = q_coreml_output['output_0'] rel_err = stable_rel_error(q_val, val) max_rel_err, mean_rel_err = np.max(rel_err), np.mean(rel_err) self.assertTrue(max_rel_err < 0.25) self.assertTrue(max_rel_err > 0.01) self.assertTrue(mean_rel_err < 0.02)
def preprocess(script_module: torch._C.ScriptObject, compile_spec: Dict[str, Tuple]): spec = compile_spec["forward"] input_specs, output_specs, backend, allow_low_precision, quantization_mode = spec mil_inputs = [] inputs = [] for index, input in enumerate(input_specs): shape, dtype = input name = "input_" + str(index) inputs.append([name, str(dtype), str(shape)]) ml_type = _convert_to_mil_type(shape, dtype, name) mil_inputs.append(ml_type) model = torch.jit.RecursiveScriptModule._construct(script_module, lambda x: None) mlmodel = ct.convert(model, inputs=mil_inputs) if(quantization_mode != CoreMLQuantizationMode.NONE): quant_model_spec = quantization_utils.quantize_weights(mlmodel, nbits=8, quantization_mode=quantization_mode) mlmodel = ct.models.MLModel(quant_model_spec) spec = mlmodel.get_spec() assert len(spec.description.output) == len(output_specs) # type: ignore[attr-defined] outputs = [] for index, output in enumerate(output_specs): shape, dtype = output name = spec.description.output[index].name # type: ignore[attr-defined] outputs.append([name, str(dtype), str(shape)]) mlmodel = ct.models.model.MLModel(spec) print(mlmodel) config = { "spec_ver": str(spec.specificationVersion), # type: ignore[attr-defined] "backend": backend, "allow_low_precision": str(allow_low_precision), } metadata = { "coremltool_ver": mlmodel.user_defined_metadata[CT_METADATA_VERSION], "torch_ver": mlmodel.user_defined_metadata[CT_METADATA_SOURCE], } coreml_compile_spec = { "inputs": inputs, "outputs": outputs, "config": config, "metadata": metadata, } mlmodel = spec.SerializeToString() # type: ignore[attr-defined] return { "model": mlmodel, "hash": str(hashlib.sha256(mlmodel).hexdigest()), "extra": json.dumps(coreml_compile_spec), }
def test_nn_fp16_make_updatable_fail(self): nn_builder = self.create_base_builder(is_updatable=False) model_path = os.path.join(self.model_dir, "updatable_creation.mlmodel") print(model_path) save_spec(nn_builder.spec, model_path) mlmodel = MLModel(model_path) quantized_model = quantization_utils.quantize_weights( mlmodel, 16, "linear") q_nn_builder = NeuralNetworkBuilder(spec=quantized_model._spec) # fails since an FP16 model cannot be marked updatable with self.assertRaises(ValueError): q_nn_builder.make_updatable(["ip1", "ip2"])
def test_nn_partial_fp16_make_updatable_quantized_layer_fail(self): nn_builder = self.create_base_builder(is_updatable=False) model_path = os.path.join(self.model_dir, "updatable_creation.mlmodel") print(model_path) save_spec(nn_builder.spec, model_path) mlmodel = MLModel(model_path) selector = LayerSelector(layer_name='ip2') quantized_model = quantization_utils.quantize_weights( mlmodel, 16, "linear", selector=selector) q_nn_builder = NeuralNetworkBuilder(spec=quantized_model._spec) # fails since model has a layer with FP16 bias with self.assertRaises(ValueError): q_nn_builder.make_updatable(["ip2"])
"--model", required=True, help="path to trained model model") args = vars(ap.parse_args()) print("[INFO] loading model...") model = load_model(args["model"]) print("[INFO] converting model...") mlmodel = ct.convert(model) spec = mlmodel.get_spec() ct.utils.rename_feature(spec, 'Identity', 'confidence') ct.utils.rename_feature(spec, 'conv2d_input', 'image') mlmodel = ct.models.MLModel(spec) mlmodel.author = 'xRapid Group' mlmodel.license = 'Private Use' mlmodel.short_description = 'Classifies RDTs.' mlmodel.version = '1.0.0' mlmodel.input_description[ 'image'] = 'Image. Grayscale. Normalised. Shape: (1, 256, 256, 1). Type: float32.' mlmodel.output_description[ 'confidence'] = '0=negative. 1=positive. Shape: (1). Type: float32.' print('[INFO] quantizing model...') mlmodel = quantization_utils.quantize_weights(mlmodel, nbits=8) print('[INFO] saving model...') mlmodel.save('covidNet.mlmodel')
import coremltools from coremltools.models.neural_network import quantization_utils model = coremltools.models.MLModel('emnist_model1.mlmodel') quantized_model = quantization_utils.quantize_weights(model, 8, "linear") #coremltools.utils.save_spec(quantized_model, 'emnist_model1_FP8.mlmodel') quantized_model.save('emnist_model1_FP8.mlmodel') print('Done!') model = coremltools.models.MLModel('emnist_model2.mlmodel') quantized_model = quantization_utils.quantize_weights(model, 8, "linear") #coremltools.utils.save_spec(quantized_model, 'emnist_model2_FP8.mlmodel') quantized_model.save('emnist_model2_FP8.mlmodel') print('Done!') model = coremltools.models.MLModel('emnist_model3.mlmodel') quantized_model = quantization_utils.quantize_weights(model, 8, "linear") #coremltools.utils.save_spec(quantized_model, 'emnist_model3_FP8.mlmodel') quantized_model.save('emnist_model3_FP8.mlmodel') print('Done!') model = coremltools.models.MLModel('emnist_model4.mlmodel') quantized_model = quantization_utils.quantize_weights(model, 8, "linear") #coremltools.utils.save_spec(quantized_model, 'emnist_model4_FP8.mlmodel') quantized_model.save('emnist_model4_FP8.mlmodel') print('Done!') model = coremltools.models.MLModel('emnist_model5.mlmodel') quantized_model = quantization_utils.quantize_weights(model, 8, "linear") #coremltools.utils.save_spec(quantized_model, 'emnist_model5_FP8.mlmodel') quantized_model.save('emnist_model5_FP8.mlmodel')
download_blob(SOURCE_BUCKET, SOURCE_MODEL_PATH, '/tmp/model.h5') download_blob(SOURCE_BUCKET, SOURCE_LABELS_PATH, '/tmp/labels.txt') # Convert h5 model to coreml OUTPUT_NAME = ['Identity'] MODEL_LABELS = '/tmp/labels.txt' model = tfcoreml.convert( './tmp/kiosk_model.h5', image_input_names=['input_1'], input_name_shape_dict={'input_1': (1, 224, 224, 3)}, output_feature_names=OUTPUT_NAME, minimum_ios_deployment_target='13', red_bias=-1, green_bias=-1, blue_bias=-1, is_bgr=True, image_scale=2.0 / 255.0, ) model.save('/tmp/model.mlmodel') # Create quantised version of coreml model model = coremltools.models.MLModel('/tmp/model.mlmodel') quantized_model = quantize_weights(model, nbits=8, quantization_mode="linear") quantized_model.save('/tmp/model_quant.mlmodel') upload_blob(DESTINATION_BUCKET, '/tmp/model.mlmodel', DESTINATION_DIRECTORY + '/model.mlmodel') upload_blob(DESTINATION_BUCKET, '/tmp/model_quant.mlmodel', DESTINATION_DIRECTORY + '/model_quant.mlmodel')
import coremltools from coremltools.models.neural_network.quantization_utils import quantize_weights import sys model_in = sys.argv[1] names = model_in.split(".") model_out = names[0] + "_quatumized." + names[1] # if the OS is not macOS or old macOS # quantize_weights() returns spec rather than model model = coremltools.models.MLModel(model_in) n_bits = 8 mode = "kmeans" try: quatumized_spec = quantize_weights(model, n_bits, mode) coremltools.utils.save_spec(quatumized_spec, model_out) except Exception as err: print("macOS version: ", coremltools.models.utils.macos_version()) print(err) quatumized_model = quantize_weights(model, n_bits, mode) coremltools.utils.save_spec(quatumized_model.spec, model_out)
# Convert the model mlmodel = ct.convert( trace, inputs=[ct.ImageType(name="__input", shape=dummy_input.shape)], ) spec = mlmodel.get_spec() # Edit the spec ct.utils.rename_feature(spec, '__input', 'image') ct.utils.rename_feature(spec, '2577', 'output') # save out the updated model mlmodel = ct.models.MLModel(spec) print(mlmodel) from coremltools.models.neural_network import quantization_utils from coremltools.models.neural_network.quantization_utils import AdvancedQuantizedLayerSelector selector = AdvancedQuantizedLayerSelector( skip_layer_types=['batchnorm', 'bias', 'depthwiseConv'], minimum_conv_kernel_channels=4, minimum_conv_weight_count=4096) model_fp16 = quantization_utils.quantize_weights(mlmodel, nbits=8, quantization_mode='linear', selector=selector) fp_16_file = './centernet.mlmodel' model_fp16.save(fp_16_file)
def _test_tf_model( self, graph, input_shapes, output_node_names, data_mode='random', input_refs=None, delta=1e-2, use_cpu_only=False, graph_optimizations="freeze", # one of ["freeze", "convert_variables_to_constants", None] quantize_tf_model=False, quantize_mlmodel=False, quantize_config={}): """ Common entry to testing routine. graph - defined TensorFlow graph. input_shapes - dict str:shape for each input op (placeholder) output_node_names - output_node_names, a list of strings data_mode - auto-generated input vectors, can be 'random', 'zeros', 'ones', 'linear', etc. input_refs - a dictionary of reference input in tensorFlow axis order, each entry is str:shape. When using auto-generated input vectors, set input_refs to None. delta - maximum difference of normalized TensorFlow and CoreML outputs use_cpu_only - If True, instantiate and run CoreML model with CPU only graph_optimizations == "freeze" - Force TensorFlow graph to be frozen before converting. quantize_tf_model - If True, try to quantize TensorFlow model before converting quantize_mlmodel - If True, quantize the mlmodel after converting. quantize_config - Dictionary with test quantization parameters """ # Some file processing model_dir = tempfile.mkdtemp() graph_def_file = os.path.join(model_dir, 'tf_graph.pb') checkpoint_file = os.path.join(model_dir, 'tf_model.ckpt') static_model_file = os.path.join(model_dir, 'tf_static.pb') coreml_model_file = os.path.join(model_dir, 'coreml_model.mlmodel') # add a saver tf.reset_default_graph() if graph_optimizations == "freeze": with graph.as_default() as g: saver = tf.train.Saver() if input_refs is None: feed_dict = { self._get_tf_tensor_name(graph, name): generate_data(input_shapes[name], data_mode) for name in input_shapes } else: feed_dict = { self._get_tf_tensor_name(graph, name): input_refs[name] for name in list(input_refs.keys()) } with tf.Session(graph=graph) as sess: # initialize initializer_op = tf.global_variables_initializer() sess.run(initializer_op) # run the result fetches = [graph.get_operation_by_name(name).outputs[0] for name in output_node_names] result = sess.run(fetches, feed_dict=feed_dict) # save graph definition somewhere tf.train.write_graph(sess.graph, model_dir, graph_def_file, as_text=False) # save the weights if freezing is needed if not graph_optimizations: static_model_file = graph_def_file elif graph_optimizations == "freeze": saver.save(sess, checkpoint_file) self._simple_freeze( input_graph=graph_def_file, input_checkpoint=checkpoint_file, output_graph=static_model_file, output_node_names=",".join(output_node_names)) else: output_graph_def = tf.graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), output_node_names) with tf.gfile.GFile(static_model_file, "wb") as f: f.write(output_graph_def.SerializeToString()) # if TF needs to be quantized, quantize the graph if quantize_tf_model: static_model_file = self._quantize_static_tf_model( model_dir, static_model_file, output_node_names) # convert to CoreML mlmodel = coremltools.converters.tensorflow.convert( static_model_file, inputs=input_shapes, outputs=output_node_names, use_cpu_only=use_cpu_only) # Quantize MLModel if needed if quantize_mlmodel: from coremltools.models.neural_network.quantization_utils import quantize_weights nbits = quantize_config['nbits'] mode = quantize_config['mode'] mlmodel = quantize_weights(mlmodel, nbits, quantization_mode=mode) if DEBUG: print('\n mlmodel description: \n') from coremltools.models.neural_network.printer import print_network_spec print_network_spec(mlmodel.get_spec(), style='coding') mlmodel.save(coreml_model_file) print('\n mlmodel saved at %s' % coreml_model_file) coreml_input_names = [str(x) for x in mlmodel.input_description] coreml_input_shapes = _parse_coreml_input_shapes(mlmodel) # Transpose input data as CoreML requires coreml_inputs = {} for name in coreml_input_names: tfop_name = _parse_coreml_name_to_tf(name) if tfop_name in input_shapes: coreml_inputs[name] = tf_transpose( feed_dict[self._get_tf_tensor_name(graph, tfop_name)]) else: coreml_inputs[name] = np.zeros(coreml_input_shapes[name]) # Run predict in CoreML coreml_output = mlmodel.predict(coreml_inputs, useCPUOnly=use_cpu_only) for idx, out_name in enumerate(output_node_names): tf_out = result[idx] if len(tf_out.shape) == 0: tf_out = np.array([tf_out]) tp = tf_out.flatten() if out_name in coreml_output: coreml_out = coreml_output[out_name] elif out_name+'__outvar__' in coreml_output: coreml_out = coreml_output[out_name+'__outvar__'] else: self.assertTrue(False, 'CoreML output not found') cp = coreml_out.flatten() self.assertTrue(tf_out.shape == coreml_out.shape) for i in range(len(tp)): max_den = max(1.0, tp[i], cp[i]) self.assertAlmostEqual(tp[i] / max_den, cp[i] / max_den, delta=delta) # Cleanup files - models on disk no longer useful if os.path.exists(model_dir): shutil.rmtree(model_dir)
# Set feature descriptions (these show up as comments in XCode) ctmodel.input_description["drawing"] = "Input drawing to be classified" ctmodel.output_description["classLabel"] = "Most likely symbol" ctmodel.output_description[ "classLabelProbs"] = "Probability scores for each symbol" # Set model author name ctmodel.author = "Venkata S Govindarajan" # Set the license of the model ctmodel.license = "MIT License" # Set a short description for the Xcode UI ctmodel.short_description = "Detects the most likely LaTeX mathematical symbol \ corresponding to a drawing." # Set a version for the model ctmodel.version = "0.95" # Save model ctmodel.save("deTeX.mlmodel") # Quantisation to FP16 model that reduces size by half without (supposedly) # affecting accuracy ctmodel_fp16 = quantization_utils.quantize_weights(ctmodel, nbits=16) ctmodel_fp16.save("deTeX16.mlmodel") ctmodel_fp8 = quantization_utils.quantize_weights(ctmodel, nbits=8) ctmodel_fp8.save("deTeX8.mlmodel")
import sys import coremltools as ct from coremltools.models.neural_network import quantization_utils if len(sys.argv) != 3: print("USAGE: %s <input_mlmodel> <output_mlmodel>" % sys.argv[0]) sys.exit(1) input_model_path = sys.argv[1] output_model_path = sys.argv[2] # coremltools 3 version: #spec = coremltools.utils.load_spec(input_model_path) #spec_fp16 = coremltools.utils.convert_neural_network_spec_weights_to_fp16(spec) #coremltools.utils.save_spec(spec_fp16, output_model_path) # coremltools 4 version: model = ct.models.MLModel(input_model_path) model_fp16 = quantization_utils.quantize_weights(model, nbits=16) model_fp16.save(output_model_path)
# The mode argument should be one of: # linear # kmeans # dequantization # # The number of bits should be between 1 and 8. import sys import coremltools as ct from coremltools.models.neural_network import quantization_utils if len(sys.argv) < 4: print("USAGE: %s <input_mlmodel> <output_mlmodel> <mode> <bits>" % sys.argv[0]) sys.exit(1) input_model_path = sys.argv[1] output_model_path = sys.argv[2] mode = sys.argv[3] nbits = int(sys.argv[4]) if len(sys.argv) > 4 else 8 model = ct.models.MLModel(input_model_path) quant_model = quantization_utils.quantize_weights(model, nbits, mode) quant_model.save(output_model_path)
import torch import sys import numpy as np from model import Net import coremltools as ct from coremltools.models.neural_network import quantization_utils model_in = sys.argv[1] label_count = sys.argv[2] model = Net(output_label_count=int(label_count)) model.load_state_dict(torch.load(model_in)) model.cpu() # convert model to cpu model.eval() # switch to eval mode random_input = torch.rand(1, 1, 98, 40) traced_model = torch.jit.trace(model, random_input, check_trace=False) print("converting pymodl to coreml model") converted_model = ct.convert( traced_model, # convert using Unified Conversion API inputs=[ct.TensorType(shape=random_input.shape)]) print("convertion is completed saving to disk f{}") # allowed values of nbits = 16, 8, 7, 6, ...., 1 quantized_model = quantization_utils.quantize_weights(converted_model, 8) converted_model.save(model_in.replace(".pymodel", "") + ".mlmodel") quantized_model.save(model_in.replace(".pymodel", "_quantized") + ".mlmodel")
MacOS is REQUIRED for quantization. """ import os import coremltools as ct import tensorflow as tf from coremltools.models.neural_network import quantization_utils if __name__ == "__main__": # Converted model will be exported here. export_dir = "./mlmodels" if not os.path.exists(export_dir): os.mkdir(export_dir) # Restore the model. model = tf.keras.models.load_model("./exported") # Do the conversion. mlmodel = ct.convert(model) mlmodel.save("./mlmodels/hrnetv2_fp32.mlmodel") # Quantization: FP16 model_fp16 = quantization_utils.quantize_weights(mlmodel, nbits=16) model_fp16.save("./mlmodels/hrnetv2_fp16.mlmodel") # Quantization: INT8 model_int8 = quantization_utils.quantize_weights(mlmodel, nbits=8) model_int8.save("./mlmodels/model_int8.mlmodel")