def convert_weights(model: str, output_dir: str = "weights", aggressive: bool = False) -> None: """Extract weights from model, convert them into binary fixed point and save to file.""" net = onnx.load(model) weights_dict = {} for init in net.graph.initializer: weights_dict[init.name] = numpy_helper.to_array(init) last_layer_name = "" for node in net.graph.node: if node.op_type == "QLinearConv": # only convolution layers contain weights kernel = weights_dict[node.input[3]] bias = weights_dict[node.input[8]] layer_name = node.input[3][:16].zfill(16) if last_layer_name and len(last_layer_name) != len(layer_name): raise InconsistencyError( f"Layer names have different length. " f"{len(last_layer_name)} != {len(layer_name)}. " f"Padding to 16 chars failed.") last_layer_name = layer_name int_bits = 8 - int(math.log2(weights_dict[node.input[4]])) frac_bits = int(math.log2(weights_dict[node.input[4]])) kernel = to_fixed_point_array( kernel, int_bits=int_bits, frac_bits=frac_bits, aggressive=aggressive) bias = to_fixed_point_array( bias, int_bits=int_bits, frac_bits=frac_bits, aggressive=aggressive) weights_to_files(kernel, bias, layer_name, output_dir)
def analyze_and_quantize(original_weights, original_bias, aggressive: bool = False) -> dict: """Analyze and quantize the weights.""" max_val = max(np.amax(original_weights), np.amax(original_bias)) min_val = min(np.amin(original_weights), np.amin(original_bias)) highest_val = max(abs(max_val), abs(min_val)) int_width = get_integer_width(highest_val) print("weight quantization: ", int_width, 8 - int_width) print("stats: ", max_val, min_val, highest_val) # quantize the weights quantized_weights = to_fixed_point_array(original_weights, int_bits=int_width, frac_bits=8 - int_width, aggressive=aggressive) quantized_bias = to_fixed_point_array(original_bias, int_bits=int_width, frac_bits=8 - int_width, aggressive=aggressive) quantized_weights_int = v_to_fixedint(quantized_weights) quantized_bias_int = v_to_fixedint(quantized_bias) print("average error per weight:", np.mean(np.abs(original_weights - quantized_weights))) avg_val = np.mean(np.abs(quantized_weights)) print("average absolute weight value:", avg_val) # print the weight stats (bias is omitted for now) count = {"total": quantized_weights.size} count["zeros"] = count["total"] - np.count_nonzero(quantized_weights) count["power_of_two"] = np.count_nonzero( v_is_power_of_two(quantized_weights)) count["other"] = count["total"] - count["zeros"] - count["power_of_two"] print("total weights:", count["total"]) print("zero weights:", count["zeros"], count["zeros"] / count["total"]) print("power of two weights:", count["power_of_two"], count["power_of_two"] / count["total"]) print("left weights:", count["other"], count["other"] / count["total"]) if aggressive and count["other"]: Warning("At aggressive quantization all weights should be" "0 or power of two.") return { "weights": quantized_weights_int, "bias": quantized_bias_int, "quant": (int_width, 8 - int_width), "avg_val": avg_val, }
def relu(array_in): """Rectified linear unit activation.""" sample = array_in.item(0) array_out = to_fixed_point_array( np.zeros(array_in.shape), format_inst=sample, signed=sample.is_signed) return np.where(array_in > 0, array_in, array_out)
def numpy_inference(onnx_model, input_): """Calculate the inference of a given input with a given model.""" weights_dict = {} for init in onnx_model.graph.initializer: weights_dict[init.name] = numpy_helper.to_array(init) next_input = input_ for node in onnx_model.graph.node: params = parse_param.parse_node_attributes(node) if node.op_type == "Conv": raise NotSupportedError(f"Layer {node.op_type} not supported.") if node.op_type == "QLinearConv": pad = parse_param.get_pad(params) if pad: next_input = cnn_reference.zero_pad(next_input, pad) ksize, stride = parse_param.get_kernel_params(params) int_bits_weights = 8 - int(math.log2(weights_dict[node.input[4]])) frac_bits_weights = int(math.log2(weights_dict[node.input[4]])) weights = to_fixed_point_array(weights_dict[node.input[3]], int_bits=int_bits_weights, frac_bits=frac_bits_weights) bias = to_fixed_point_array(weights_dict[node.input[8]], int_bits=int_bits_weights, frac_bits=frac_bits_weights) bitwidth_out = ( 8 - int(math.log2(weights_dict[node.input[6]])), int(math.log2(weights_dict[node.input[6]])), ) next_input = cnn_reference.conv(next_input, weights, bias, (ksize, stride), bitwidth_out) elif node.op_type == "MaxPool": ksize, stride = parse_param.get_kernel_params(params) next_input = cnn_reference.max_pool(next_input, ksize, stride) elif node.op_type == "GlobalAveragePool": next_input = cnn_reference.avg_pool(next_input) elif node.op_type == "Relu": next_input = cnn_reference.relu(next_input) elif node.op_type == "LeakyRelu": next_input = cnn_reference.leaky_relu( next_input, FpBinary(int_bits=0, frac_bits=3, value=0.125)) return next_input
def zero_pad(array_in, size: int = 1): """Zero padding with same padding at each edge.""" sample = array_in.item(0) # TODO: figure out why np.pad doesn't work # c = np.pad(array_in, ((0, 0), (0, 0), (size, size), (size, size)), # "constant", constant_values=FpBinary(...)) shape_out = (array_in.shape[0], array_in.shape[1], array_in.shape[2] + 2*size, array_in.shape[3] + 2*size) array_out = to_fixed_point_array( np.zeros(shape_out), format_inst=sample, signed=sample.is_signed) array_out[:, :, size:-size, size:-size] = array_in return array_out
def avg_pool(array_in): """Global average pooling layer.""" _, _, width, height = array_in.shape sample = array_in.item(0) # calculate reciprocal for average manually, because else factor would # be too different reciprocal = to_fixed_point_array( np.array(1. / (width * height)), int_bits=1, frac_bits=16, signed=False) array_out = np.sum(np.sum(array_in, axis=2), axis=2) * reciprocal # TODO: replace for loop for value in np.nditer(array_out, flags=["refs_ok"]): value.item().resize( sample.format, OverflowEnum.sat, RoundingEnum.near_even) return array_out