Exemplo n.º 1
0
    def test_embeddingND_quantize(self):
        input_features = [("data", datatypes.Array(10, 1))]
        output_features = [("output", None)]
        builder = neural_network.NeuralNetworkBuilder(
            input_features, output_features, disable_rank5_shape_mapping=True)

        builder.add_embedding_nd(
            name="embedding_nd",
            input_name="data",
            output_name="output",
            vocab_size=300,
            embedding_size=20,
            W=np.random.rand(20, 300),
        )

        spec = builder.spec
        model_fp32 = coremltools.models.MLModel(spec)
        self.assertEqual(
            len(spec.neuralNetwork.layers[0].embeddingND.weights.floatValue),
            6000)

        # quantize to FP16
        model_fp16 = quantization_utils.quantize_weights(model_fp32, nbits=16)
        spec_fp16 = model_fp16.get_spec()
        self.assertEqual(
            len(spec_fp16.neuralNetwork.layers[0].embeddingND.weights.
                floatValue), 0)
        self.assertEqual(
            len(spec_fp16.neuralNetwork.layers[0].embeddingND.weights.
                float16Value),
            2 * 6000,
        )

        # quantize to uint8
        model_uint8 = quantization_utils.quantize_weights(model_fp32, nbits=8)
        spec_uint8 = model_uint8.get_spec()
        self.assertEqual(
            len(spec_uint8.neuralNetwork.layers[0].embeddingND.weights.
                floatValue), 0)
        self.assertEqual(
            len(spec_uint8.neuralNetwork.layers[0].embeddingND.weights.
                float16Value), 0)
        self.assertEqual(
            len(spec_uint8.neuralNetwork.layers[0].embeddingND.weights.rawValue
                ), 6000)

        # quantize to uint5
        model_uint5 = quantization_utils.quantize_weights(model_fp32, nbits=5)
        spec_uint5 = model_uint5.get_spec()
        self.assertEqual(
            len(spec_uint5.neuralNetwork.layers[0].embeddingND.weights.
                floatValue), 0)
        self.assertEqual(
            len(spec_uint5.neuralNetwork.layers[0].embeddingND.weights.
                float16Value), 0)
        self.assertEqual(
            len(spec_uint5.neuralNetwork.layers[0].embeddingND.weights.rawValue
                ), 3750)  # 3750 = 5*6000/8
Exemplo n.º 2
0
    def test_nn_partial_fp16_make_updatable_fail(self):
        nn_builder = self.create_base_builder()
        model_path = os.path.join(self.model_dir, "updatable_creation.mlmodel")
        print(model_path)
        save_spec(nn_builder.spec, model_path)
        mlmodel = MLModel(model_path)

        # fails since updatable models cannot get quantized to FP16
        with self.assertRaises(Exception):
            quantization_utils.quantize_weights(mlmodel, 16, "linear")
Exemplo n.º 3
0
def main(args):
    if args.type == 'FLOAT32':
        if args.model_dir[-3:] != '.pb':
            print("Error: the model type must be .pb file")
            return
        else:
            coreml_model = tfcoreml.convert(
                tf_model_path=args.model_dir,
                mlmodel_path=args.output_file,
                input_name_shape_dict={'input': [1, 160, 160, 3]},
                output_feature_names=["embeddings"],
                minimum_ios_deployment_target='13')
            return
    else:
        if args.model_dir[-8:] != '.mlmodel':
            print("Error: the model type must be .mlmodel")
            return
        if args.type == 'FLOAT16':
            model_spec = coremltools.utils.load_spec(args.model_dir)
            model_fp16_spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(
                model_spec)
            coremltools.utils.save_spec(model_fp16_spec, args.output_file)
            return
        else:
            model = coremltools.models.MLModel(args.model_dir)
            bit = int(args.type[-1])
            print("quantization in INT" + str(bit))
            quantized_model = quantization_utils.quantize_weights(
                model, bit, "linear")
            quantized_model.save(args.output_file)
            return
    print('File correctly saved in:', args.output_file)
Exemplo n.º 4
0
    def test_linear_quant_batchedmatmul_8bit(self):
        np.random.seed(1988)
        W = np.random.rand(32, 32) * 2.0 - 1
        bias = np.random.rand(32)

        input_features = [("data", datatypes.Array(2, 32))]
        output_features = [("out", None)]
        builder = NeuralNetworkBuilder(
            input_features, output_features, disable_rank5_shape_mapping=True
        )
        builder.add_batched_mat_mul(
            name="batched_matmul",
            input_names=["data"],
            output_name="out",
            weight_matrix_rows=32,
            weight_matrix_columns=32,
            W=W,
            bias=bias,
        )
        mlmodel = MLModel(builder.spec)
        q_mlmodel = quantize_weights(mlmodel, 8)
        q_spec = q_mlmodel.get_spec()
        q_layer = q_spec.neuralNetwork.layers[0].batchedMatmul

        self.assertTrue(len(q_layer.weights.floatValue) == 0)
        self.assertTrue(len(q_layer.weights.rawValue) > 0)

        data = np.random.rand(2, 32)
        data_dict = {"data": data}
        out = q_mlmodel.predict(data_dict, useCPUOnly=True)["out"]
        expected_out = np.matmul(data, W) + bias
        self.assertTrue(out.shape == expected_out.shape)
        self.assertTrue(np.allclose(out.flatten(), expected_out.flatten(), atol=0.1))
Exemplo n.º 5
0
def convert(model: Model,
            model_name=None,
            nbits=32,
            quantization_mode="linear",
            class_labels=["stay", "walk", "jog", "skip", "stUp", "stDown"]):
    # add reshape layer
    model = add_reshape_layer(model)

    classifier_config = ct.ClassifierConfig(class_labels=class_labels)
    mlmodel = ct.convert(model, classifier_config=classifier_config)

    # Quantization options
    available_options = list(range(1, 9)) + [16]
    if nbits in available_options:
        mlmodel = quantization_utils.quantize_weights(
            mlmodel, nbits=nbits, quantization_mode=quantization_mode)

    # Add description
    if model_name is not None:
        # if model is quantized
        if nbits in available_options:
            if nbits == 16:
                model_name = "{}, float {} bit".format(model_name, nbits)
            else:
                model_name = "{}, int {} bit".format(model_name, nbits)

        mlmodel.short_description = "Activity Classifier ({})".format(
            model_name)
        mlmodel.input_description[
            "input"] = "Input acceleration data to be classified"
        mlmodel.output_description["classLabel"] = "Most likely activity"
        mlmodel.output_description["Identity"] = "Probability of each activity"

    return mlmodel
Exemplo n.º 6
0
def onnx_to_coreml(model_name:str, half_precision:bool, quarter_precision:bool, return_or_save:str='save'):
    """
    Arguments
    ---------
    model_name: str
        filename of the model to convert
    half_precision: bool
        whether to convert the coreml model to half precision
    quarter_precision: bool
        whether to conver the coreml model to quarter precision
    return_or_save: str == "save" or "return"
        if "save", the model will saved as the model_name. if 'return' the model object is returned

    Returns
    -------
    coreml_model: coreml-model-object
        if return_or_save == 'return', the model object is returned. otherwise, None.
    """

    assert return_or_save in ['save', 'return'], \
        f"return_or_save must be 'save' or 'return'. {return_or_save} entered."

    assert not(half_precision and quarter_precision), \
        "half-precision and quarter-precision flags can't both be used during same call."
    
    onnx_path = os.path.join("onnx_models", model_name+"_model.onnx")
    coreml_path = os.path.join("coreml_models", model_name+"_model.mlmodel")

    onnx_model = onnx.load(onnx_path)

    coreml_model = convert(model=onnx_model,
                            minimum_ios_deployment_target = '13')

    if half_precision:
        coreml_model = quantization_utils.quantize_weights(coreml_model, nbits=16)
        print("\n~~~~ Converted CoreML Model to half precision ~~~~\n")
    elif quarter_precision:
        coreml_model = quantization_utils.quantize_weights(coreml_model, nbits=8)
        print("\n~~~~ Converted CoreML Model to quarter precision ~~~~\n")
    else:
        print("\n~~~~ CoreML Model kept at single precision ~~~~\n")

    if return_or_save == 'save':
        coreml_model.save(coreml_path)
        print(f"Onnx model successfully converted to CoreML at: {coreml_path}")
    elif return_or_save == 'return':
        return onnx_model, coreml_model
Exemplo n.º 7
0
    def test_8bit_symmetric_and_skips(self):
        from keras.models import Sequential
        from keras.layers import Conv2D

        def stable_rel_error(x, ref):
            err = x - ref
            denom = np.maximum(np.abs(ref), np.ones_like(ref))
            return np.abs(err) / denom

        np.random.seed(1988)
        input_dim = 16
        num_kernels, kernel_height, kernel_width, input_channels = 64, 3, 3, 32

        # Define a model
        model = Sequential()
        model.add(
            Conv2D(input_shape=(input_dim, input_dim, input_channels),
                   filters=num_kernels,
                   kernel_size=(kernel_height, kernel_width)))

        # Set some random weights
        weight, bias = model.layers[0].get_weights()
        num_filters = weight.shape[-1]
        filter_shape = weight.shape[:-1]

        new_weight = np.stack([
            4.0 * np.random.rand(*filter_shape) - 2 for i in range(num_filters)
        ],
                              axis=-1)
        model.layers[0].set_weights([new_weight, bias])

        mlmodel = keras_converter.convert(model, ['data'], ['output_0'])
        selector = quantization_utils.AdvancedQuantizedLayerSelector(
            skip_layer_types=['batchnorm', 'bias', 'depthwiseConv'],
            minimum_conv_kernel_channels=4,
            minimum_conv_weight_count=4096)

        q_mlmodel = quantization_utils.quantize_weights(mlmodel,
                                                        8,
                                                        selector=selector)

        input_shape = (1, 1, input_channels, input_dim, input_dim)
        input_val = 2 * np.random.rand(*input_shape) - 1

        coreml_input = {'data': input_val}
        coreml_output = mlmodel.predict(coreml_input)
        q_coreml_output = q_mlmodel.predict(coreml_input)

        val = coreml_output['output_0']
        q_val = q_coreml_output['output_0']
        rel_err = stable_rel_error(q_val, val)
        max_rel_err, mean_rel_err = np.max(rel_err), np.mean(rel_err)
        self.assertTrue(max_rel_err < 0.25)
        self.assertTrue(max_rel_err > 0.01)
        self.assertTrue(mean_rel_err < 0.02)
Exemplo n.º 8
0
def preprocess(script_module: torch._C.ScriptObject, compile_spec: Dict[str, Tuple]):
    spec = compile_spec["forward"]
    input_specs, output_specs, backend, allow_low_precision, quantization_mode = spec
    mil_inputs = []
    inputs = []
    for index, input in enumerate(input_specs):
        shape, dtype = input
        name = "input_" + str(index)
        inputs.append([name, str(dtype), str(shape)])
        ml_type = _convert_to_mil_type(shape, dtype, name)
        mil_inputs.append(ml_type)
    model = torch.jit.RecursiveScriptModule._construct(script_module, lambda x: None)
    mlmodel = ct.convert(model, inputs=mil_inputs)

    if(quantization_mode != CoreMLQuantizationMode.NONE):
        quant_model_spec = quantization_utils.quantize_weights(mlmodel, nbits=8, quantization_mode=quantization_mode)
        mlmodel = ct.models.MLModel(quant_model_spec)

    spec = mlmodel.get_spec()
    assert len(spec.description.output) == len(output_specs)  # type: ignore[attr-defined]
    outputs = []
    for index, output in enumerate(output_specs):
        shape, dtype = output
        name = spec.description.output[index].name  # type: ignore[attr-defined]
        outputs.append([name, str(dtype), str(shape)])
    mlmodel = ct.models.model.MLModel(spec)
    print(mlmodel)
    config = {
        "spec_ver": str(spec.specificationVersion),  # type: ignore[attr-defined]
        "backend": backend,
        "allow_low_precision": str(allow_low_precision),
    }
    metadata = {
        "coremltool_ver": mlmodel.user_defined_metadata[CT_METADATA_VERSION],
        "torch_ver": mlmodel.user_defined_metadata[CT_METADATA_SOURCE],
    }
    coreml_compile_spec = {
        "inputs": inputs,
        "outputs": outputs,
        "config": config,
        "metadata": metadata,
    }
    mlmodel = spec.SerializeToString()  # type: ignore[attr-defined]

    return {
        "model": mlmodel,
        "hash": str(hashlib.sha256(mlmodel).hexdigest()),
        "extra": json.dumps(coreml_compile_spec),
    }
Exemplo n.º 9
0
    def test_nn_fp16_make_updatable_fail(self):
        nn_builder = self.create_base_builder(is_updatable=False)
        model_path = os.path.join(self.model_dir, "updatable_creation.mlmodel")
        print(model_path)
        save_spec(nn_builder.spec, model_path)
        mlmodel = MLModel(model_path)

        quantized_model = quantization_utils.quantize_weights(
            mlmodel, 16, "linear")

        q_nn_builder = NeuralNetworkBuilder(spec=quantized_model._spec)

        # fails since an FP16 model cannot be marked updatable
        with self.assertRaises(ValueError):
            q_nn_builder.make_updatable(["ip1", "ip2"])
Exemplo n.º 10
0
    def test_nn_partial_fp16_make_updatable_quantized_layer_fail(self):
        nn_builder = self.create_base_builder(is_updatable=False)
        model_path = os.path.join(self.model_dir, "updatable_creation.mlmodel")
        print(model_path)
        save_spec(nn_builder.spec, model_path)
        mlmodel = MLModel(model_path)

        selector = LayerSelector(layer_name='ip2')
        quantized_model = quantization_utils.quantize_weights(
            mlmodel, 16, "linear", selector=selector)

        q_nn_builder = NeuralNetworkBuilder(spec=quantized_model._spec)

        # fails since model has a layer with FP16 bias
        with self.assertRaises(ValueError):
            q_nn_builder.make_updatable(["ip2"])
Exemplo n.º 11
0
                "--model",
                required=True,
                help="path to trained model model")
args = vars(ap.parse_args())

print("[INFO] loading model...")
model = load_model(args["model"])

print("[INFO] converting model...")
mlmodel = ct.convert(model)
spec = mlmodel.get_spec()
ct.utils.rename_feature(spec, 'Identity', 'confidence')
ct.utils.rename_feature(spec, 'conv2d_input', 'image')
mlmodel = ct.models.MLModel(spec)

mlmodel.author = 'xRapid Group'
mlmodel.license = 'Private Use'
mlmodel.short_description = 'Classifies RDTs.'
mlmodel.version = '1.0.0'

mlmodel.input_description[
    'image'] = 'Image. Grayscale. Normalised. Shape: (1, 256, 256, 1). Type: float32.'
mlmodel.output_description[
    'confidence'] = '0=negative. 1=positive. Shape: (1). Type: float32.'

print('[INFO] quantizing model...')
mlmodel = quantization_utils.quantize_weights(mlmodel, nbits=8)

print('[INFO] saving model...')
mlmodel.save('covidNet.mlmodel')
Exemplo n.º 12
0
import coremltools
from coremltools.models.neural_network import quantization_utils

model = coremltools.models.MLModel('emnist_model1.mlmodel')
quantized_model = quantization_utils.quantize_weights(model, 8, "linear")
#coremltools.utils.save_spec(quantized_model, 'emnist_model1_FP8.mlmodel')
quantized_model.save('emnist_model1_FP8.mlmodel')
print('Done!')

model = coremltools.models.MLModel('emnist_model2.mlmodel')
quantized_model = quantization_utils.quantize_weights(model, 8, "linear")
#coremltools.utils.save_spec(quantized_model, 'emnist_model2_FP8.mlmodel')
quantized_model.save('emnist_model2_FP8.mlmodel')
print('Done!')

model = coremltools.models.MLModel('emnist_model3.mlmodel')
quantized_model = quantization_utils.quantize_weights(model, 8, "linear")
#coremltools.utils.save_spec(quantized_model, 'emnist_model3_FP8.mlmodel')
quantized_model.save('emnist_model3_FP8.mlmodel')
print('Done!')

model = coremltools.models.MLModel('emnist_model4.mlmodel')
quantized_model = quantization_utils.quantize_weights(model, 8, "linear")
#coremltools.utils.save_spec(quantized_model, 'emnist_model4_FP8.mlmodel')
quantized_model.save('emnist_model4_FP8.mlmodel')
print('Done!')

model = coremltools.models.MLModel('emnist_model5.mlmodel')
quantized_model = quantization_utils.quantize_weights(model, 8, "linear")
#coremltools.utils.save_spec(quantized_model, 'emnist_model5_FP8.mlmodel')
quantized_model.save('emnist_model5_FP8.mlmodel')
Exemplo n.º 13
0
download_blob(SOURCE_BUCKET, SOURCE_MODEL_PATH, '/tmp/model.h5')
download_blob(SOURCE_BUCKET, SOURCE_LABELS_PATH, '/tmp/labels.txt')

# Convert h5 model to coreml
OUTPUT_NAME = ['Identity']
MODEL_LABELS = '/tmp/labels.txt'
model = tfcoreml.convert(
    './tmp/kiosk_model.h5',
    image_input_names=['input_1'],
    input_name_shape_dict={'input_1': (1, 224, 224, 3)},
    output_feature_names=OUTPUT_NAME,
    minimum_ios_deployment_target='13',
    red_bias=-1,
    green_bias=-1,
    blue_bias=-1,
    is_bgr=True,
    image_scale=2.0 / 255.0,
)
model.save('/tmp/model.mlmodel')

# Create quantised version of coreml model
model = coremltools.models.MLModel('/tmp/model.mlmodel')
quantized_model = quantize_weights(model, nbits=8, quantization_mode="linear")
quantized_model.save('/tmp/model_quant.mlmodel')

upload_blob(DESTINATION_BUCKET, '/tmp/model.mlmodel',
            DESTINATION_DIRECTORY + '/model.mlmodel')
upload_blob(DESTINATION_BUCKET, '/tmp/model_quant.mlmodel',
            DESTINATION_DIRECTORY + '/model_quant.mlmodel')
Exemplo n.º 14
0
import coremltools
from coremltools.models.neural_network.quantization_utils import quantize_weights
import sys

model_in = sys.argv[1]
names = model_in.split(".")
model_out = names[0] + "_quatumized." + names[1]

# if the OS is not macOS or old macOS
# quantize_weights() returns spec rather than model
model = coremltools.models.MLModel(model_in)
n_bits = 8
mode = "kmeans"
try:
    quatumized_spec = quantize_weights(model, n_bits, mode)
    coremltools.utils.save_spec(quatumized_spec, model_out)
except Exception as err:
    print("macOS version: ", coremltools.models.utils.macos_version())
    print(err)
    quatumized_model = quantize_weights(model, n_bits, mode)
    coremltools.utils.save_spec(quatumized_model.spec, model_out)
Exemplo n.º 15
0
# Convert the model
mlmodel = ct.convert(
    trace,
    inputs=[ct.ImageType(name="__input", shape=dummy_input.shape)],
)
spec = mlmodel.get_spec()

# Edit the spec
ct.utils.rename_feature(spec, '__input', 'image')
ct.utils.rename_feature(spec, '2577', 'output')
# save out the updated model
mlmodel = ct.models.MLModel(spec)
print(mlmodel)

from coremltools.models.neural_network import quantization_utils
from coremltools.models.neural_network.quantization_utils import AdvancedQuantizedLayerSelector

selector = AdvancedQuantizedLayerSelector(
    skip_layer_types=['batchnorm', 'bias', 'depthwiseConv'],
    minimum_conv_kernel_channels=4,
    minimum_conv_weight_count=4096)

model_fp16 = quantization_utils.quantize_weights(mlmodel,
                                                 nbits=8,
                                                 quantization_mode='linear',
                                                 selector=selector)

fp_16_file = './centernet.mlmodel'
model_fp16.save(fp_16_file)
Exemplo n.º 16
0
    def _test_tf_model(
            self,
            graph,
            input_shapes,
            output_node_names,
            data_mode='random',
            input_refs=None,
            delta=1e-2,
            use_cpu_only=False,
            graph_optimizations="freeze",  # one of ["freeze", "convert_variables_to_constants", None]
            quantize_tf_model=False,
            quantize_mlmodel=False,
            quantize_config={}):
        """
        Common entry to testing routine.
        graph - defined TensorFlow graph.
        input_shapes -  dict str:shape for each input op (placeholder)
        output_node_names - output_node_names, a list of strings
        data_mode - auto-generated input vectors, can be 'random', 'zeros', 'ones', 'linear', etc.
        input_refs - a dictionary of reference input in tensorFlow axis order, each entry is str:shape.
            When using auto-generated input vectors, set input_refs to None.
        delta - maximum difference of normalized TensorFlow and CoreML outputs
        use_cpu_only - If True, instantiate and run CoreML model with CPU only
        graph_optimizations == "freeze" - Force TensorFlow graph to be frozen before converting.
        quantize_tf_model - If True, try to quantize TensorFlow model before converting
        quantize_mlmodel - If True, quantize the mlmodel after converting.
        quantize_config - Dictionary with test quantization parameters
        """

        # Some file processing
        model_dir = tempfile.mkdtemp()
        graph_def_file = os.path.join(model_dir, 'tf_graph.pb')
        checkpoint_file = os.path.join(model_dir, 'tf_model.ckpt')
        static_model_file = os.path.join(model_dir, 'tf_static.pb')
        coreml_model_file = os.path.join(model_dir, 'coreml_model.mlmodel')

        # add a saver
        tf.reset_default_graph()
        if graph_optimizations == "freeze":
            with graph.as_default() as g:
                saver = tf.train.Saver()

        if input_refs is None:
            feed_dict = {
                self._get_tf_tensor_name(graph, name): generate_data(input_shapes[name], data_mode)
                for name in input_shapes
            }
        else:
            feed_dict = {
                self._get_tf_tensor_name(graph, name): input_refs[name]
                for name in list(input_refs.keys())
            }

        with tf.Session(graph=graph) as sess:
            # initialize
            initializer_op = tf.global_variables_initializer()
            sess.run(initializer_op)
            # run the result
            fetches = [graph.get_operation_by_name(name).outputs[0] for name in output_node_names]
            result = sess.run(fetches, feed_dict=feed_dict)
            # save graph definition somewhere
            tf.train.write_graph(sess.graph, model_dir, graph_def_file, as_text=False)
            # save the weights if freezing is needed
            if not graph_optimizations:
                static_model_file = graph_def_file
            elif graph_optimizations == "freeze":
                saver.save(sess, checkpoint_file)
                self._simple_freeze(
                    input_graph=graph_def_file,
                    input_checkpoint=checkpoint_file,
                    output_graph=static_model_file,
                    output_node_names=",".join(output_node_names))
            else:
                output_graph_def = tf.graph_util.convert_variables_to_constants(
                    sess, graph.as_graph_def(), output_node_names)
                with tf.gfile.GFile(static_model_file, "wb") as f:
                    f.write(output_graph_def.SerializeToString())

        # if TF needs to be quantized, quantize the graph
        if quantize_tf_model:
            static_model_file = self._quantize_static_tf_model(
                model_dir, static_model_file, output_node_names)

        # convert to CoreML
        mlmodel = coremltools.converters.tensorflow.convert(
            static_model_file,
            inputs=input_shapes,
            outputs=output_node_names,
            use_cpu_only=use_cpu_only)

        # Quantize MLModel if needed
        if quantize_mlmodel:
            from coremltools.models.neural_network.quantization_utils import quantize_weights
            nbits = quantize_config['nbits']
            mode = quantize_config['mode']
            mlmodel = quantize_weights(mlmodel, nbits, quantization_mode=mode)

        if DEBUG:
            print('\n mlmodel description: \n')
            from coremltools.models.neural_network.printer import print_network_spec
            print_network_spec(mlmodel.get_spec(), style='coding')
            mlmodel.save(coreml_model_file)
            print('\n mlmodel saved at %s' % coreml_model_file)

        coreml_input_names = [str(x) for x in mlmodel.input_description]
        coreml_input_shapes = _parse_coreml_input_shapes(mlmodel)

        # Transpose input data as CoreML requires
        coreml_inputs = {}
        for name in coreml_input_names:
            tfop_name = _parse_coreml_name_to_tf(name)
            if tfop_name in input_shapes:
                coreml_inputs[name] = tf_transpose(
                    feed_dict[self._get_tf_tensor_name(graph, tfop_name)])
            else:
                coreml_inputs[name] = np.zeros(coreml_input_shapes[name])

        # Run predict in CoreML
        coreml_output = mlmodel.predict(coreml_inputs, useCPUOnly=use_cpu_only)

        for idx, out_name in enumerate(output_node_names):
            tf_out = result[idx]
            if len(tf_out.shape) == 0:
                tf_out = np.array([tf_out])

            tp = tf_out.flatten()
            if out_name in coreml_output:
                coreml_out = coreml_output[out_name]
            elif out_name+'__outvar__' in coreml_output:
                coreml_out = coreml_output[out_name+'__outvar__']
            else:
                self.assertTrue(False, 'CoreML output not found')

            cp = coreml_out.flatten()

            self.assertTrue(tf_out.shape == coreml_out.shape)
            for i in range(len(tp)):
                max_den = max(1.0, tp[i], cp[i])
                self.assertAlmostEqual(tp[i] / max_den, cp[i] / max_den, delta=delta)

        # Cleanup files - models on disk no longer useful
        if os.path.exists(model_dir):
            shutil.rmtree(model_dir)
Exemplo n.º 17
0
# Set feature descriptions (these show up as comments in XCode)
ctmodel.input_description["drawing"] = "Input drawing to be classified"
ctmodel.output_description["classLabel"] = "Most likely symbol"
ctmodel.output_description[
    "classLabelProbs"] = "Probability scores for each symbol"

# Set model author name
ctmodel.author = "Venkata S Govindarajan"

# Set the license of the model
ctmodel.license = "MIT License"

# Set a short description for the Xcode UI
ctmodel.short_description = "Detects the most likely LaTeX mathematical symbol \
                           corresponding to a drawing."

# Set a version for the model
ctmodel.version = "0.95"

# Save model
ctmodel.save("deTeX.mlmodel")

# Quantisation to FP16 model that reduces size by half without (supposedly)
# affecting accuracy

ctmodel_fp16 = quantization_utils.quantize_weights(ctmodel, nbits=16)
ctmodel_fp16.save("deTeX16.mlmodel")

ctmodel_fp8 = quantization_utils.quantize_weights(ctmodel, nbits=8)
ctmodel_fp8.save("deTeX8.mlmodel")
import sys
import coremltools as ct
from coremltools.models.neural_network import quantization_utils

if len(sys.argv) != 3:
    print("USAGE: %s <input_mlmodel> <output_mlmodel>" % sys.argv[0])
    sys.exit(1)

input_model_path = sys.argv[1]
output_model_path = sys.argv[2]

# coremltools 3 version:
#spec = coremltools.utils.load_spec(input_model_path)
#spec_fp16 = coremltools.utils.convert_neural_network_spec_weights_to_fp16(spec)
#coremltools.utils.save_spec(spec_fp16, output_model_path)

# coremltools 4 version:
model = ct.models.MLModel(input_model_path)
model_fp16 = quantization_utils.quantize_weights(model, nbits=16)
model_fp16.save(output_model_path)
Exemplo n.º 19
0
# The mode argument should be one of:
#   linear
#   kmeans
#   dequantization
#
# The number of bits should be between 1 and 8.

import sys
import coremltools as ct
from coremltools.models.neural_network import quantization_utils

if len(sys.argv) < 4:
    print("USAGE: %s <input_mlmodel> <output_mlmodel> <mode> <bits>" %
          sys.argv[0])
    sys.exit(1)

input_model_path = sys.argv[1]
output_model_path = sys.argv[2]
mode = sys.argv[3]
nbits = int(sys.argv[4]) if len(sys.argv) > 4 else 8

model = ct.models.MLModel(input_model_path)
quant_model = quantization_utils.quantize_weights(model, nbits, mode)
quant_model.save(output_model_path)
Exemplo n.º 20
0
import torch
import sys
import numpy as np
from model import Net
import coremltools as ct
from coremltools.models.neural_network import quantization_utils

model_in = sys.argv[1]
label_count = sys.argv[2]

model = Net(output_label_count=int(label_count))
model.load_state_dict(torch.load(model_in))

model.cpu()  # convert model to cpu
model.eval()  # switch to eval mode

random_input = torch.rand(1, 1, 98, 40)
traced_model = torch.jit.trace(model, random_input, check_trace=False)

print("converting pymodl to coreml model")
converted_model = ct.convert(
    traced_model,  # convert using Unified Conversion API
    inputs=[ct.TensorType(shape=random_input.shape)])
print("convertion is completed saving to disk f{}")

# allowed values of nbits = 16, 8, 7, 6, ...., 1
quantized_model = quantization_utils.quantize_weights(converted_model, 8)
converted_model.save(model_in.replace(".pymodel", "") + ".mlmodel")
quantized_model.save(model_in.replace(".pymodel", "_quantized") + ".mlmodel")
MacOS is REQUIRED for quantization.
"""

import os

import coremltools as ct
import tensorflow as tf
from coremltools.models.neural_network import quantization_utils

if __name__ == "__main__":
    # Converted model will be exported here.
    export_dir = "./mlmodels"
    if not os.path.exists(export_dir):
        os.mkdir(export_dir)

    # Restore the model.
    model = tf.keras.models.load_model("./exported")

    # Do the conversion.
    mlmodel = ct.convert(model)
    mlmodel.save("./mlmodels/hrnetv2_fp32.mlmodel")

    # Quantization: FP16
    model_fp16 = quantization_utils.quantize_weights(mlmodel, nbits=16)
    model_fp16.save("./mlmodels/hrnetv2_fp16.mlmodel")

    # Quantization: INT8
    model_int8 = quantization_utils.quantize_weights(mlmodel, nbits=8)
    model_int8.save("./mlmodels/model_int8.mlmodel")