def quantize_model(output_graph_file, test_data, input_holder, output): """Quantize tf model by using amct tool.""" batch = load_image(test_data, input_holder.shape) input_name = input_holder.name output_name = output.name with tf.io.gfile.GFile(output_graph_file, mode='rb') as model: graph_def = tf.compat.v1.GraphDef() graph_def.ParseFromString(model.read()) graph = tf.Graph() with graph.as_default(): tf.import_graph_def(graph_def, name='') input_tensor = graph.get_tensor_by_name(input_name) output_tensor = graph.get_tensor_by_name(output_name) base_dir = os.path.dirname(output_graph_file) config_path = os.path.join(base_dir, 'config.json') amct.create_quant_config(config_file=config_path, graph=graph, skip_layers=[], batch_num=1) record_path = os.path.join(base_dir, 'record.txt') amct.quantize_model(graph=graph, config_file=config_path, record_file=record_path) with tf.compat.v1.Session(graph=graph) as sess: sess.run(tf.compat.v1.global_variables_initializer()) sess.run(output_tensor, feed_dict={input_tensor: batch}) save_path = os.path.join(base_dir, 'tf_model') amct.save_model(pb_model=output_graph_file, outputs=[output_name[:-2]], record_file=record_path, save_path=save_path) os.system('cp {}_quantized.pb {}'.format(save_path, output_graph_file)) logging.info('amct quantinize successfully.')
def quantize_all_layers(model_path, input_tensor_name, output_tensor_name): """Quantify all layers of the whole network.""" # Step one, load original model. graph, input_tensor, output_tensor = load_model(model_path, input_tensor_name, output_tensor_name) # Step two, generate the quantization config file. config_path = os.path.join(OUTPUTS, 'config.json') if ARGS.cfg_define is not None: amct.create_quant_config(config_file=config_path, graph=graph, skip_layers=[], batch_num=1, config_defination=ARGS.cfg_define) else: amct.create_quant_config(config_file=config_path, graph=graph, skip_layers=[], batch_num=1) # Step three, quantize the model. record_path = os.path.join(OUTPUTS, 'record.txt') amct.quantize_model(graph=graph, config_file=config_path, record_file=record_path) # Step four, calibrate and save the quantized model. calibration_path = os.path.join(PATH, 'data/calibration') batch = load_image(calibration_path) with tf.compat.v1.Session() as session: session.run(tf.compat.v1.global_variables_initializer()) session.run(output_tensor, feed_dict={input_tensor: batch}) amct.save_model(pb_model=model_path, outputs=['MobilenetV2/Predictions/Reshape_1'], record_file=record_path, save_path=os.path.join(OUTPUTS, 'mobilenet_v2_all_layers'))
def main(): # pylint: disable=too-many-statements, too-many-locals, not-context-manager """ Before run this script, please check whether the following files exist in the same directory. calibration.jpg COCO_labels.txt, detection.jpg, yolov3_tensorflow_1.5.pb, :return: None """ # Step one, load the trained model. # This sample will use YOLOv3 which is trained with COCO dataset # and save as 'yolov3_tensorflow_1.5.pb'. Therefore, loading COCO # labels and test image to do inference. model_path = os.path.join(PATH, 'model/yolov3_tensorflow_1.5.pb') with tf.io.gfile.GFile(model_path, mode='rb') as model: graph_def = tf.compat.v1.GraphDef() graph_def.ParseFromString(model.read()) tf.import_graph_def(graph_def, name='') graph = tf.compat.v1.get_default_graph() input_tensor = graph.get_tensor_by_name(INPUT_NAME + ':0') xy_tensor = graph.get_tensor_by_name(XY_NAME + ':0') confidence_tensor = graph.get_tensor_by_name(CONF_NAME + ':0') probability_tensor = graph.get_tensor_by_name(PROB_NAME + ':0') labels = [] label_path = os.path.join(PATH, 'data/COCO_labels.txt') with open(label_path) as label_file: for line in label_file: labels.append(line[:-1]) image_path = os.path.join(PATH, 'data/detection.jpg') image_input = preprocessing(image_path) image_input = image_input.reshape([1, SIDE, SIDE, 3]) with tf.compat.v1.Session() as session: xy, confidence, probability = session.run( [xy_tensor, confidence_tensor, probability_tensor], feed_dict={input_tensor: image_input}) xy = xy.reshape([-1, 4]) confidence = confidence.reshape([-1, 1]) probability = probability.reshape([-1, 80]) bbox_origin = np.concatenate((xy, confidence, probability), axis=-1) # Step two, generate the quantization config file. # The function 'create_quant_config' will generate a config file # that describe how to quantize the model in graph. The config file # is saved as JSON format, and you can edit the file to configurate # your quantization parameters of each layers(support FC, CONV and # DW) easily. config_path = os.path.join(OUTPUTS, 'config.json') cfg_define = os.path.join(PATH, 'src/yolo_quant.cfg') amct.create_quant_config(config_file=config_path, graph=graph, config_defination=cfg_define) # Step three, quantize the model. # The function 'quantize_model' will quantize your model in graph # according to config file. record_path = os.path.join(OUTPUTS, 'record.txt') amct.quantize_model(graph=graph, config_file=config_path, record_file=record_path) # Step four, calibrate and save the quantized model. # The quantization parameters require one or more batch data to do # inference and find the optimal values. This process is referred to # calibration. For example, we use 1 pictures for calibration. Be # sure to initialize the quantization parameters first. If your # model have variables, you must reload the checkpoint before # inference. After calibration, you can save the quantized model from # original model and record_file. The quantized model can be used for # simulating test on CPU/GPU and evaluate the accuracy of quantized # model. and it can also be used for ATC to generate the model # running on Ascend AI Processor. calibration_path = os.path.join(PATH, 'data/calibration.jpg') image_calibration = preprocessing(calibration_path) image_calibration = image_calibration.reshape([1, SIDE, SIDE, 3]) with tf.compat.v1.Session() as session: session.run(tf.compat.v1.global_variables_initializer()) session.run([xy_tensor, confidence_tensor, probability_tensor], feed_dict={input_tensor: image_calibration}) amct.save_model(pb_model=model_path, outputs=[XY_NAME, CONF_NAME, PROB_NAME], record_file=record_path, save_path=os.path.join(OUTPUTS, 'yolo_v3')) # Step five, reload and test the quantized model for 'Fakequant'. model_path = os.path.join(OUTPUTS, 'yolo_v3_quantized.pb') with tf.io.gfile.GFile(name=model_path, mode='rb') as model: graph_def_reload = tf.compat.v1.GraphDef() graph_def_reload.ParseFromString(model.read()) graph_reload = tf.Graph() with graph_reload.as_default(): tf.import_graph_def(graph_def=graph_def_reload, name='') input_tensor = graph_reload.get_tensor_by_name(INPUT_NAME + ':0') xy_tensor = graph_reload.get_tensor_by_name(XY_NAME + ':0') confidence_tensor = graph_reload.get_tensor_by_name(CONF_NAME + ':0') probability_tensor = graph_reload.get_tensor_by_name(PROB_NAME + ':0') with tf.compat.v1.Session(graph=graph_reload) as session: xy, confidence, probability = session.run( [xy_tensor, confidence_tensor, probability_tensor], feed_dict={input_tensor: image_input}) xy = xy.reshape([-1, 4]) confidence = confidence.reshape([-1, 1]) probability = probability.reshape([-1, 80]) bbox_fakequant = np.concatenate((xy, confidence, probability), axis=-1) bounds, classes, scores = postprocessing(bbox_origin, image_path) origin_image = annotate(image_path, bounds, classes, scores, labels) origin_image.save('origin.png', 'png') origin_image.show('origin') print('origin.png save successfully!') bounds, classes, scores = postprocessing(bbox_fakequant, image_path) quantize_image = annotate(image_path, bounds, classes, scores, labels) quantize_image.save('quantize.png', 'png') quantize_image.show('quantize') print('quantize.png save successfully!')
def main(): # pylint: disable=too-many-locals, not-context-manager """ Before run this script, please check whether the following files exist in the same directory. classification.jpg, folder 'calibration' contains 32 images :return: None """ # Step one, inference the original model to obtain the original # precision before quantization. model_path = os.path.realpath('./model/mobilenetv2_tf.pb') with tf.io.gfile.GFile(model_path, mode='rb') as model: graph_def = tf.compat.v1.GraphDef() graph_def.ParseFromString(model.read()) tf.import_graph_def(graph_def, name='') graph = tf.compat.v1.get_default_graph() input_tensor = graph.get_tensor_by_name(INPUT_NAME + ':0') output_tensor = graph.get_tensor_by_name(OUTPUT_NAME + ':0') image_path = os.path.realpath('./data/classification.jpg') image_test = Image.open(image_path).resize([SIDE, SIDE]) image_test = np.array(image_test).astype(np.float32) / 128 - 1 image_test = image_test.reshape([1, SIDE, SIDE, 3]) with tf.compat.v1.Session() as session: origin_prediction = session.run(output_tensor, feed_dict={input_tensor: image_test}) # Step two, generate the quantization config file. # The function 'create_quant_config' will generate a config file # that describe how to quantize the model in graph. The config file # is saved as JSON format, and you can edit the file to configurate # your quantization parameters of each layers(support FC, CONV and # DW) easily. config_path = os.path.join(OUTPUTS, 'config.json') amct.create_quant_config(config_file=config_path, graph=graph, skip_layers=[], batch_num=1) # Step three, quantize the model. # The function 'quantize_model' will quantize your model in graph # according to config file. record_path = os.path.join(OUTPUTS, 'record.txt') amct.quantize_model(graph=graph, config_file=config_path, record_file=record_path) # Step four, calibrate and save the quantized model. # The quantization parameters require one or more batch data to do # inference and find the optimal values. This process is referred # to calibration. For example, we use 32 pictures for calibration. # Be sure to initialize the quantization parameters first. If your # model have variables, you must reload the checkpoint before # inference. After calibration, you can save the quantized model # from original model and record_file. The quantized model can be # used for simulating test on CPU/GPU and evaluate the accuracy of # quantized model, and it can also be used for ATC to generate the # model running on Ascend AI Processor. calibration_path = os.path.realpath('./data/calibration') batch = load_image(calibration_path) with tf.compat.v1.Session() as session: session.run(tf.compat.v1.global_variables_initializer()) session.run(output_tensor, feed_dict={input_tensor: batch}) amct.save_model(pb_model=model_path, outputs=[OUTPUT_NAME], record_file=record_path, save_path=os.path.join(OUTPUTS, 'mobilenet_v2')) # Step five, reload and test the quantized model for 'Fakequant'. model_path = os.path.join(OUTPUTS, 'mobilenet_v2_quantized.pb') with tf.io.gfile.GFile(name=model_path, mode='rb') as model: graph_def_reload = tf.compat.v1.GraphDef() graph_def_reload.ParseFromString(model.read()) graph_reload = tf.Graph() with graph_reload.as_default(): tf.import_graph_def(graph_def=graph_def_reload, name='') input_tensor = graph_reload.get_tensor_by_name(INPUT_NAME + ':0') output_tensor = graph_reload.get_tensor_by_name(OUTPUT_NAME + ':0') with tf.compat.v1.Session(graph=graph_reload) as session: fakequant_prediction = session.run( output_tensor, feed_dict={input_tensor: image_test}) print('Origin Model Prediction:\n', '\tcategory index: %d\n' % origin_prediction.argmax(), '\tcategory prob: %.3f\n' % round(origin_prediction.max(), 3), end='') print('Quantized Model Prediction:\n', '\tcategory index: %d\n' % fakequant_prediction.argmax(), '\tcategory prob: %.3f\n' % round(fakequant_prediction.max(), 3), end='')
def main(): # pylint: disable=R0914 """ Before run this script, please check whether the following files exist in the same directory. classification.jpg folder 'calibration' contains 32 images :return: None """ model_file = os.path.join(PATH, 'model/resnet_v1_50.pb') load_graph(model_file) graph = tf.get_default_graph() input_tensor = graph.get_tensor_by_name('input:0') output_tensor = graph.get_tensor_by_name('Reshape_1:0') image_path = os.path.join(PATH, 'data/classification.jpg') image_test = Image.open(image_path).resize([SIDE, SIDE]) image_test = np.array(image_test).astype(np.float32) / 128 - 1 image_test = image_test.reshape([1, SIDE, SIDE, 3]) print('inference with origin pb********************') with tf.Session() as session: origin_prediction = session.run(output_tensor, feed_dict={input_tensor: image_test}) config_file = os.path.join(OUTPUTS, 'config.json') record_file = os.path.join(OUTPUTS, 'record.txt') amct.create_quant_config(config_file, graph, batch_num=1, config_defination='./src/nuq_conf/nuq_quant.cfg') amct.quantize_model(graph, config_file, record_file) calibration_path = os.path.join(PATH, 'data/calibration') batch = load_image(calibration_path) with tf.Session() as session: session.run(tf.global_variables_initializer()) session.run(output_tensor, feed_dict={input_tensor: batch}) amct.save_model(model_file, ['Reshape_1'], record_file, os.path.join(OUTPUTS, 'resnet-50_v1')) # reload and test the quantized model for 'Fakequant'. model_file = os.path.join(OUTPUTS, 'resnet-50_v1_quantized.pb') with tf.io.gfile.GFile(model_file, mode='rb') as model: graph_def_reload = tf.GraphDef() graph_def_reload.ParseFromString(model.read()) graph_reload = tf.Graph() with graph_reload.as_default(): tf.import_graph_def(graph_def_reload, name='') print('inference with quantized pb====================') with tf.Session(graph=graph_reload) as session: fakequant_prediction = session.run('Reshape_1:0', feed_dict={'input:0': image_test}) print('Origin Model Prediction:\n', '\tcategory index: %d\n' % origin_prediction.argmax(), '\tcategory prob: %.3f\n' % round(origin_prediction.max(), 3), end='') print('Quantized Model Prediction:\n', '\tcategory index: %d\n' % fakequant_prediction.argmax(), '\tcategory prob: %.3f\n' % round(fakequant_prediction.max(), 3), end='')