def test_quantize(net, data, epsilon=EPSILON): x_list = [ data["input"][0, :, :], data["layer1_bn_out"][0, :, :, :], data["layer2_pool_out"][0, :, 0, :], data["layer3_conv_out"][0, :, 0, :], data["layer4_pool_out"][0, :, 0, :] ] y_exp_list = [ data["input_quant"][0, 0, :, :], data["layer1_activ"][0, :, :, :], data["layer2_activ"][0, :, 0, :], data["layer3_activ"][0, :, 0, :], data["layer4_activ"][0, :, 0, :] ] scale_list = [ convert.ste_quant(net, "quant1"), convert.ste_quant(net, "quant2"), convert.ste_quant(net, "quant3"), convert.ste_quant(net, "quant4"), convert.ste_quant(net, "quant5") ] casename_list = ["input", "layer1", "layer2", "layer3", "layer4"] ret = {} for casename, x, y_exp, scale in zip(casename_list, x_list, y_exp_list, scale_list): y = F.quantize(x, scale) l1_error = np.abs(y - y_exp).mean() success = l1_error < epsilon ret[casename] = {"result": success, "l1_error": l1_error} return ret
def __init__(self, net, T, F2, reorder_bn=True, clip_balanced=True, **params): self.name = "Layer 4: Point Convolution + Batch Norm + ReLU + Pooling" self.T = T self.F2 = F2 self.input_shape = ((F2, T // 8)) self.output_shape = ((F2, T // 64)) self.clip_balanced = clip_balanced self.reorder_bn = reorder_bn # fetch weights self.weights, self.weight_scale = convert.inq_conv2d(net, "sep_conv2") assert self.weights.shape == (self.F2, self.F2, 1, 1) self.weights = np.reshape(self.weights, (self.F2, self.F2)) # fetch batch norm offset and scale self.input_scale = convert.ste_quant(net, "quant4") self.output_scale = convert.ste_quant(net, "quant5") self.bn_scale, self.bn_offset = convert.batch_norm(net, "batch_norm3") self.factor, self.bias = convert.div_factor_batch_norm( self.input_scale, self.weight_scale, self.output_scale, self.bn_scale, self.bn_offset, pool=8)
def __init__(self, net, T, F2, N, clip_balanced=True, **params): self.name = "Layer 5: Linear Layer" self.T = T self.F2 = F2 self.N = N self.flatten_dim = self.F2 * (self.T // 64) self.input_shape = ((F2, T // 64)) self.output_shape = ((N, )) self.clip_balanced = clip_balanced # fetch weights self.weights, self.bias, self.weight_scale = convert.inq_linear( net, "fc") assert self.weights.shape == (self.N, self.flatten_dim) self.input_scale = convert.ste_quant(net, "quant5") self.output_scale = convert.ste_quant(net, "quant6") self.factor = convert.div_factor(self.input_scale, self.weight_scale, self.output_scale)
def __init__(self, net, T, F2, clip_balanced=True, **params): self.name = "Layer 3: Convolution in Time" self.T = T self.F2 = F2 self.input_shape = ((F2, T // 8)) self.output_shape = ((F2, T // 8)) self.clip_balanced = clip_balanced # fetch weights self.weights, self.weight_scale = convert.inq_conv2d(net, "sep_conv1") assert self.weights.shape == (self.F2, 1, 1, 16) self.weights = np.reshape(self.weights, (self.F2, 16)) # fetch batch norm offset and scale self.input_scale = convert.ste_quant(net, "quant3") self.output_scale = convert.ste_quant(net, "quant4") self.factor = convert.div_factor(self.input_scale, self.weight_scale, self.output_scale)
def __init__(self, net, C, T, F1, F2, clip_balanced=True, **params): self.name = "Layer 1: Convolution in Time + Batch Norm" self.C = C self.T = T self.F1 = F1 self.F2 = F2 self.input_shape = ((C, T)) self.output_shape = ((F2, T // 8)) self.clip_balanced = clip_balanced # fetch weights self.weights_1, self.weight_scale_1 = convert.inq_conv2d(net, "conv1") assert self.weights_1.shape == (self.F1, 1, 1, 64) self.weights_1 = np.reshape(self.weights_1, (self.F1, 64)) self.weights_2, self.weight_scale_2 = convert.inq_conv2d(net, "conv2") assert self.weights_2.shape == (self.F2, 1, self.C, 1) self.weights_2 = np.reshape(self.weights_2, (self.F2, self.C)) # fetch batch norm offset and scale self.input_scale = convert.ste_quant(net, "quant1") self.intermediate_scale = convert.ste_quant(net, "quant2") self.output_scale = convert.ste_quant(net, "quant3") self.bn_scale_1, self.bn_offset_1 = convert.batch_norm( net, "batch_norm1") self.bn_scale_2, self.bn_offset_2 = convert.batch_norm( net, "batch_norm2") self.factor_1, self.bias_1 = convert.div_factor_batch_norm( self.input_scale, self.weight_scale_1, self.intermediate_scale, self.bn_scale_1, self.bn_offset_1) self.factor_2, self.bias_2 = convert.div_factor_batch_norm( self.intermediate_scale, self.weight_scale_2, self.output_scale, self.bn_scale_2, self.bn_offset_2, pool=8) # update the factors and scales for k in range(16): self.factor_2[k] *= self.factor_1[k // 2] self.bias_2[k] *= self.factor_1[k // 2]
def __init__(self, net, C, T, F1, clip_balanced=True, **params): self.name = "Layer 1: Convolution in Time + Batch Norm" self.C = C self.T = T self.F1 = F1 self.input_shape = ((C, T)) self.output_shape = ((F1, C, T)) self.clip_balanced = clip_balanced # fetch weights self.weights, self.weight_scale = convert.inq_conv2d(net, "conv1") assert self.weights.shape == (self.F1, 1, 1, 64) self.weights = np.reshape(self.weights, (self.F1, 64)) # fetch batch norm offset and scale self.input_scale = convert.ste_quant(net, "quant1") self.output_scale = convert.ste_quant(net, "quant2") self.bn_scale, self.bn_offset = convert.batch_norm(net, "batch_norm1") self.factor, self.bias = convert.div_factor_batch_norm( self.input_scale, self.weight_scale, self.output_scale, self.bn_scale, self.bn_offset)
def __init__(self, net, C, T, F1, F2, reorder_bn=True, clip_balanced=True, **params): self.name = "Layer 2: Convolution in Space + Batch Norm + ReLU + Pooling" self.C = C self.T = T self.F1 = F1 self.F2 = F2 self.input_shape = ((F1, C, T)) self.output_shape = ((F2, T // 8)) self.clip_balanced = clip_balanced self.reorder_bn = reorder_bn # fetch weights self.weights, self.weight_scale = convert.inq_conv2d(net, "conv2") self.float_weights = np.reshape(net["conv2.weightFrozen"], (self.F2, self.C)) self.float_weights = np.flip(self.float_weights, (-1)) assert self.weights.shape == (self.F2, 1, self.C, 1) self.weights = np.reshape(self.weights, (self.F2, self.C)) # fetch batch norm offset and scale self.input_scale = convert.ste_quant(net, "quant2") self.output_scale = convert.ste_quant(net, "quant3") self.bn_scale, self.bn_offset = convert.batch_norm(net, "batch_norm2") self.factor, self.bias = convert.div_factor_batch_norm( self.input_scale, self.weight_scale, self.output_scale, self.bn_scale, self.bn_offset, pool=8)
def gen_input_header(net, net_params, data, output_file): # only allow nets with 255 levels assert net_params["weightInqNumLevels"] == 255 assert net_params["actSTENumLevels"] == 255 # extract and prepare the input data scale_factor = convert.ste_quant(net, "quant1") input_quant = F.quantize_to_int(data, scale_factor) input_quant_align = align_array(input_quant) # also generate the padded input vector _, C, T = input_quant.shape T_pad = T + 63 assert T_pad % 4 == 0 input_pad = np.zeros((C, T_pad), dtype=np.int) input_pad[:, 31:31 + T] = input_quant[0] # generate the header file header = HeaderFile(output_file, "__INPUT_H__", with_c=True) header.add(HeaderArray("input_data", "int8_t", input_quant_align.ravel())) header.add(HeaderArray("input_data_pad", "int8_t", input_pad.ravel())) header.write()
def gen_net_header(net_file, config_file, output_file): # load network net = np.load(net_file) # load configuration file with open(config_file, "r") as _f: config = json.load(_f) # we only need the network parameters net_params = config["indiv"]["net"]["params"] # only allow nets with 255 levels assert net_params["weightInqNumLevels"] == 255 assert net_params["actSTENumLevels"] == 255 assert net_params["F2"] % 4 == 0 assert net_params["N"] == 4 # prepare params if net_params["F2"] is None: net_params["F2"] = net_params["F1"] * net_params["D"] # only allow F2 = F1 * D assert net_params["F2"] == net_params["F1"] * net_params["D"] # start the header file header = HeaderFile(output_file, "__NET_NET_H__", with_c=True) # add network dimensions header.add(HeaderComment("Network Dimensions", blank_line=False)) header.add(HeaderConstant("NET_F1", net_params["F1"], blank_line=False)) header.add(HeaderConstant("NET_F2", net_params["F2"], blank_line=False)) header.add(HeaderConstant("NET_D", net_params["D"], blank_line=False)) header.add(HeaderConstant("NET_C", net_params["C"], blank_line=False)) header.add( HeaderConstant("NET_C_ALIGN", align_array_size(net_params["C"]), blank_line=False)) header.add(HeaderConstant("NET_T", net_params["T"], blank_line=False)) header.add( HeaderConstant("NET_T_ALIGN", align_array_size(net_params["T"]), blank_line=False)) header.add(HeaderConstant("NET_T8", net_params["T"] // 8, blank_line=False)) header.add( HeaderConstant("NET_T8_ALIGN", align_array_size(net_params["T"] // 8), blank_line=False)) header.add( HeaderConstant("NET_T64", (net_params["T"] // 8) // 8, blank_line=False)) header.add( HeaderConstant("NET_T64_ALIGN", align_array_size((net_params["T"] // 8) // 8), blank_line=False)) header.add(HeaderConstant("NET_N", net_params["N"], blank_line=True)) # Layer 1 input_scale = convert.ste_quant(net, "quant1") weight, weight_scale = convert.inq_conv2d(net, "conv1") weight = weight.reshape(net_params["F1"], 64) weight_reverse, _ = convert.inq_conv2d(net, "conv1", store_reversed=True) weight_reverse = weight_reverse.reshape(net_params["F1"], 64) bn_scale, bn_offset = convert.batch_norm(net, "batch_norm1") output_scale = convert.ste_quant(net, "quant2") factor, offset = convert.div_factor_batch_norm(input_scale, weight_scale, output_scale, bn_scale, bn_offset) # add padding to the weight vector of 4 if WEIGHT_L1_PAD > 0: weight_reverse_pad = np.zeros((net_params["F1"], 64 + WEIGHT_L1_PAD)) weight_reverse_pad[:, :-WEIGHT_L1_PAD] = weight_reverse else: weight_reverse_pad = weight_reverse header.add( HeaderComment( "Layer 1\n" "=======\n" "Convolution + BN\n\n" "Input: [C, T]\n" "Weight: [F1, 64]\n" "Output: [F1, C, T]", mode="/*")) header.add(HeaderConstant("NET_L1_PAD_START", 31)) header.add(HeaderConstant("NET_L1_PAD_END", 32)) header.add( HeaderConstant("NET_L1_PAD_INPUT_LEN", net_params["T"] + 31 + 32)) header.add( HeaderConstant("NET_L1_PAD_INPUT_LEN_ALIGN", align_array_size(net_params["T"] + 31 + 32))) header.add(HeaderArray("net_l1_factor", "int32_t", factor.ravel())) header.add(HeaderArray("net_l1_offset", "int32_t", offset.ravel())) header.add(HeaderConstant("NET_L1_WEIGHT_LEN", weight.shape[-1])) header.add( HeaderConstant("NET_L1_WEIGHT_LEN_ALIGN", weight_reverse_pad.shape[-1])) header.add(HeaderArray("net_l1_weight", "int8_t", weight.ravel())) header.add( HeaderArray("net_l1_weight_reverse", "int8_t", weight_reverse.ravel())) header.add( HeaderArray("net_l1_weight_reverse_pad", "int8_t", weight_reverse_pad.ravel())) # layer2 input_scale = convert.ste_quant(net, "quant2") weight, weight_scale = convert.inq_conv2d(net, "conv2", store_reversed=True) bn_scale, bn_offset = convert.batch_norm(net, "batch_norm2") output_scale = convert.ste_quant(net, "quant3") factor, offset = convert.div_factor_batch_norm(input_scale, weight_scale, output_scale, bn_scale, bn_offset, pool=8) weight = weight.reshape(net_params["F2"], net_params["C"]) weight = align_array(weight) header.add( HeaderComment( "Layer 2\n" "=======\n" "Convolution + BN + ReLU + Pooling\n\n" "Input: [F1, C, T]\n" "Weight: [F2, C] (aligned to [F2, 24]\n" "Output: [F2, T // 8]", mode="/*")) header.add(HeaderArray("net_l2_factor", "int32_t", factor.ravel())) header.add(HeaderArray("net_l2_offset", "int32_t", offset.ravel())) header.add(HeaderConstant("NET_L2_WEIGHT_LEN", weight.shape[-1])) header.add(HeaderArray("net_l2_weight", "int8_t", weight.ravel())) header.add(HeaderArray("net_l2_weight_32", "int32_t", weight.ravel())) # layer3 input_scale = convert.ste_quant(net, "quant3") weight, weight_scale = convert.inq_conv2d(net, "sep_conv1") output_scale = convert.ste_quant(net, "quant4") factor = convert.div_factor(input_scale, weight_scale, output_scale) weight = weight.reshape(net_params["F2"], 16) header.add( HeaderComment( "Layer 3\n" "=======\n" "Convolution\n\n" "Input: [F2, T // 8]\n" "Weight: [F2, 16]\n" "Output: [F2, T // 8]", mode="/*", blank_line=False)) header.add(HeaderConstant("NET_L3_PAD_START", 7)) header.add(HeaderConstant("NET_L3_PAD_END", 8)) header.add( HeaderConstant("NET_L3_PAD_INPUT_LEN", net_params["T"] // 8 + 7 + 8)) header.add( HeaderConstant("NET_L3_PAD_INPUT_LEN_ALIGN", align_array_size(net_params["T"] // 8 + 7 + 8))) header.add(HeaderConstant("NET_L3_FACTOR", factor)) header.add(HeaderConstant("NET_L3_WEIGHT_LEN", weight.shape[-1])) header.add(HeaderArray("net_l3_weight", "int8_t", weight.ravel())) # layer4 input_scale = convert.ste_quant(net, "quant4") weight, weight_scale = convert.inq_conv2d(net, "sep_conv2") output_scale = convert.ste_quant(net, "quant5") bn_scale, bn_offset = convert.batch_norm(net, "batch_norm3") factor, offset = convert.div_factor_batch_norm(input_scale, weight_scale, output_scale, bn_scale, bn_offset, pool=8) weight = weight.reshape(net_params["F2"], net_params["F2"]) header.add( HeaderComment( "Layer 4\n" "=======\n" "Convolution + BN + ReLU + Pooling\n\n" "Input: [F2, T // 8]\n" "Weight: [F2, F2]\n" "Output: [F2, T // 64]", mode="/*")) header.add(HeaderArray("net_l4_factor", "int32_t", factor.ravel())) header.add(HeaderArray("net_l4_offset", "int32_t", offset.ravel())) header.add(HeaderConstant("NET_L4_WEIGHT_LEN", weight.shape[-1])) header.add(HeaderArray("net_l4_weight", "int8_t", weight.ravel())) # layer5 input_scale = convert.ste_quant(net, "quant5") output_scale = convert.ste_quant(net, "quant6") weight, bias, weight_scale = convert.inq_linear(net, "fc") weight = weight.reshape(net_params["N"], net_params["F2"] * (net_params["T"] // 64)) #weight = align_array(weight) # we want to align, not for the product F2*T//64, but for T//64 itself. t64 = net_params["T"] // 64 t64_align = align_array_size(t64) weight_align = np.zeros((net_params["N"], net_params["F2"] * t64_align), dtype=int) for i in range(net_params["F2"]): weight_align[:, i * t64_align:i * t64_align + t64] = weight[:, i * t64:(i + 1) * t64] factor = convert.div_factor(input_scale, weight_scale, output_scale) header.add( HeaderComment( "Layer 5\n" "=======\n" "Linear Layer (without scaling in the end)\n\n" "Input: [F2, T // 64]\n" "Weight: [N, F2 * (T // 64)]\n" "Bias: [N]\n" "Output: [N]", mode="/*")) header.add(HeaderConstant("NET_L5_FACTOR", factor)) header.add(HeaderArray("net_l5_bias", "int8_t", bias.ravel())) header.add(HeaderConstant("NET_L5_WEIGHT_LEN", weight_align.shape[-1])) header.add(HeaderArray("net_l5_weight", "int8_t", weight_align.ravel())) # store the header file header.write()