def optimize_fp16_onnx_with_cast(input_onnx_path, optimized_onnx_path, epsilon): m = onnx.load(input_onnx_path) onnx_model = OnnxModel(m) nodes_to_remove = onnx_model.nodes() nodes_to_add = [ onnx.helper.make_node("Cast", ["input"], ["fp32_input"], "cast_input", to=1), onnx.helper.make_node("Cast", ["layer_norm.weight"], ["fp32_layer_norm.weight"], "cast_weight", to=1), onnx.helper.make_node("Cast", ["layer_norm.bias"], ["fp32_layer_norm.bias"], "cast_bias", to=1), onnx.helper.make_node( "LayerNormalization", ["fp32_input", "fp32_layer_norm.weight", "fp32_layer_norm.bias"], ["fp32_output"], "layer_norm", epsilon=epsilon), # use fp32 epsilon onnx.helper.make_node("Cast", ["fp32_output"], ["output"], "cast_output", to=10) ] onnx_model.remove_nodes(nodes_to_remove) onnx_model.add_nodes(nodes_to_add) onnx_model.prune_graph() onnx_model.save_model_to_file(optimized_onnx_path)
def optimize_fp16_onnx_no_cast(input_onnx_path, optimized_onnx_path, epsilon): m = onnx.load(input_onnx_path) onnx_model = OnnxModel(m) nodes_to_remove = onnx_model.nodes() node_to_add = onnx.helper.make_node( "LayerNormalization", ["input", "layer_norm.weight", "layer_norm.bias"], ["output"], "layer_norm", epsilon=epsilon) onnx_model.remove_nodes(nodes_to_remove) onnx_model.add_node(node_to_add) onnx_model.prune_graph() onnx_model.save_model_to_file(optimized_onnx_path)
def optimize_fp16_onnx_no_cast(input_onnx_path, optimized_onnx_path, epsilon): m = onnx.load(input_onnx_path) onnx_model = OnnxModel(m) weight_name = get_weight(onnx_model) bias_name = get_bias(onnx_model) nodes_to_remove = [n for n in onnx_model.nodes() if n.output[0] != weight_name and n.output[0] != bias_name] nodes_to_remove = onnx_model.nodes() node_to_add = onnx.helper.make_node("LayerNormalization", ["input", weight_name, bias_name], ["output"], "layer_norm", epsilon=epsilon) onnx_model.remove_nodes(nodes_to_remove) onnx_model.add_node(node_to_add) onnx_model.prune_graph() onnx_model.save_model_to_file(optimized_onnx_path)
def optimize_fp16_onnx_with_cast(input_onnx_path, optimized_onnx_path, epsilon): m = onnx.load(input_onnx_path) onnx_model = OnnxModel(m) weight_name = get_weight(onnx_model) bias_name = get_bias(onnx_model) nodes_to_remove = [ n for n in onnx_model.nodes() if n.output[0] != weight_name and n.output[0] != bias_name ] nodes_to_add = [ onnx.helper.make_node("Cast", ["input"], ["fp32_input"], "cast_input", to=1), onnx.helper.make_node("Cast", [weight_name], ["fp32_layer_norm.weight"], "cast_weight", to=1), onnx.helper.make_node("Cast", [bias_name], ["fp32_layer_norm.bias"], "cast_bias", to=1), onnx.helper.make_node( "LayerNormalization", ["fp32_input", "fp32_layer_norm.weight", "fp32_layer_norm.bias"], ["fp32_output"], "layer_norm", epsilon=epsilon, ), # use fp32 epsilon onnx.helper.make_node("Cast", ["fp32_output"], ["output"], "cast_output", to=10), ] onnx_model.remove_nodes(nodes_to_remove) onnx_model.add_nodes(nodes_to_add) onnx_model.prune_graph() onnx_model.save_model_to_file(optimized_onnx_path)