示例#1
0
def convert_weights(model: str, output_dir: str = "weights",
                    aggressive: bool = False) -> None:
    """Extract weights from model, convert them into binary fixed point and
    save to file."""
    net = onnx.load(model)

    weights_dict = {}
    for init in net.graph.initializer:
        weights_dict[init.name] = numpy_helper.to_array(init)

    last_layer_name = ""
    for node in net.graph.node:
        if node.op_type == "QLinearConv":
            # only convolution layers contain weights
            kernel = weights_dict[node.input[3]]
            bias = weights_dict[node.input[8]]

            layer_name = node.input[3][:16].zfill(16)
            if last_layer_name and len(last_layer_name) != len(layer_name):
                raise InconsistencyError(
                    f"Layer names have different length. "
                    f"{len(last_layer_name)} != {len(layer_name)}. "
                    f"Padding to 16 chars failed.")
            last_layer_name = layer_name

            int_bits = 8 - int(math.log2(weights_dict[node.input[4]]))
            frac_bits = int(math.log2(weights_dict[node.input[4]]))

            kernel = to_fixed_point_array(
                kernel, int_bits=int_bits, frac_bits=frac_bits,
                aggressive=aggressive)
            bias = to_fixed_point_array(
                bias, int_bits=int_bits, frac_bits=frac_bits,
                aggressive=aggressive)
            weights_to_files(kernel, bias, layer_name, output_dir)
示例#2
0
def analyze_and_quantize(original_weights,
                         original_bias,
                         aggressive: bool = False) -> dict:
    """Analyze and quantize the weights."""
    max_val = max(np.amax(original_weights), np.amax(original_bias))
    min_val = min(np.amin(original_weights), np.amin(original_bias))
    highest_val = max(abs(max_val), abs(min_val))
    int_width = get_integer_width(highest_val)
    print("weight quantization: ", int_width, 8 - int_width)
    print("stats: ", max_val, min_val, highest_val)

    # quantize the weights
    quantized_weights = to_fixed_point_array(original_weights,
                                             int_bits=int_width,
                                             frac_bits=8 - int_width,
                                             aggressive=aggressive)
    quantized_bias = to_fixed_point_array(original_bias,
                                          int_bits=int_width,
                                          frac_bits=8 - int_width,
                                          aggressive=aggressive)
    quantized_weights_int = v_to_fixedint(quantized_weights)
    quantized_bias_int = v_to_fixedint(quantized_bias)
    print("average error per weight:",
          np.mean(np.abs(original_weights - quantized_weights)))
    avg_val = np.mean(np.abs(quantized_weights))
    print("average absolute weight value:", avg_val)

    # print the weight stats (bias is omitted for now)
    count = {"total": quantized_weights.size}
    count["zeros"] = count["total"] - np.count_nonzero(quantized_weights)
    count["power_of_two"] = np.count_nonzero(
        v_is_power_of_two(quantized_weights))
    count["other"] = count["total"] - count["zeros"] - count["power_of_two"]
    print("total weights:", count["total"])
    print("zero weights:", count["zeros"], count["zeros"] / count["total"])
    print("power of two weights:", count["power_of_two"],
          count["power_of_two"] / count["total"])
    print("left weights:", count["other"], count["other"] / count["total"])

    if aggressive and count["other"]:
        Warning("At aggressive quantization all weights should be"
                "0 or power of two.")

    return {
        "weights": quantized_weights_int,
        "bias": quantized_bias_int,
        "quant": (int_width, 8 - int_width),
        "avg_val": avg_val,
    }
示例#3
0
def relu(array_in):
    """Rectified linear unit activation."""
    sample = array_in.item(0)
    array_out = to_fixed_point_array(
        np.zeros(array_in.shape), format_inst=sample,
        signed=sample.is_signed)
    return np.where(array_in > 0, array_in, array_out)
示例#4
0
def numpy_inference(onnx_model, input_):
    """Calculate the inference of a given input with a given model."""
    weights_dict = {}
    for init in onnx_model.graph.initializer:
        weights_dict[init.name] = numpy_helper.to_array(init)

    next_input = input_
    for node in onnx_model.graph.node:
        params = parse_param.parse_node_attributes(node)

        if node.op_type == "Conv":
            raise NotSupportedError(f"Layer {node.op_type} not supported.")
        if node.op_type == "QLinearConv":
            pad = parse_param.get_pad(params)
            if pad:
                next_input = cnn_reference.zero_pad(next_input, pad)

            ksize, stride = parse_param.get_kernel_params(params)

            int_bits_weights = 8 - int(math.log2(weights_dict[node.input[4]]))
            frac_bits_weights = int(math.log2(weights_dict[node.input[4]]))
            weights = to_fixed_point_array(weights_dict[node.input[3]],
                                           int_bits=int_bits_weights,
                                           frac_bits=frac_bits_weights)
            bias = to_fixed_point_array(weights_dict[node.input[8]],
                                        int_bits=int_bits_weights,
                                        frac_bits=frac_bits_weights)

            bitwidth_out = (
                8 - int(math.log2(weights_dict[node.input[6]])),
                int(math.log2(weights_dict[node.input[6]])),
            )
            next_input = cnn_reference.conv(next_input, weights, bias,
                                            (ksize, stride), bitwidth_out)
        elif node.op_type == "MaxPool":
            ksize, stride = parse_param.get_kernel_params(params)
            next_input = cnn_reference.max_pool(next_input, ksize, stride)
        elif node.op_type == "GlobalAveragePool":
            next_input = cnn_reference.avg_pool(next_input)
        elif node.op_type == "Relu":
            next_input = cnn_reference.relu(next_input)
        elif node.op_type == "LeakyRelu":
            next_input = cnn_reference.leaky_relu(
                next_input, FpBinary(int_bits=0, frac_bits=3, value=0.125))
    return next_input
示例#5
0
def zero_pad(array_in, size: int = 1):
    """Zero padding with same padding at each edge."""
    sample = array_in.item(0)
    # TODO: figure out why np.pad doesn't work
    # c = np.pad(array_in, ((0, 0), (0, 0), (size, size), (size, size)),
    #            "constant", constant_values=FpBinary(...))
    shape_out = (array_in.shape[0], array_in.shape[1],
                 array_in.shape[2] + 2*size, array_in.shape[3] + 2*size)
    array_out = to_fixed_point_array(
        np.zeros(shape_out), format_inst=sample, signed=sample.is_signed)
    array_out[:, :, size:-size, size:-size] = array_in
    return array_out
示例#6
0
def avg_pool(array_in):
    """Global average pooling layer."""
    _, _, width, height = array_in.shape
    sample = array_in.item(0)

    # calculate reciprocal for average manually, because else factor would
    # be too different
    reciprocal = to_fixed_point_array(
        np.array(1. / (width * height)), int_bits=1, frac_bits=16,
        signed=False)
    array_out = np.sum(np.sum(array_in, axis=2), axis=2) * reciprocal
    # TODO: replace for loop
    for value in np.nditer(array_out, flags=["refs_ok"]):
        value.item().resize(
            sample.format, OverflowEnum.sat, RoundingEnum.near_even)
    return array_out