def make_default_quantizer(self, mode) -> quantizer_impl.IQuantizer: """make quantizer given qkeras quantizer type.""" if mode == "fp32": return quantizer_impl.FloatingPoint(bits=32) elif mode == "fp16": return quantizer_impl.FloatingPoint(bits=16) elif mode == "int8": qbits = quantizer_impl.QuantizedBits() qbits.convert_qkeras_quantizer(quantizers.quantized_bits(8, 0, 1)) return qbits elif mode == "int16": qbits = quantizer_impl.QuantizedBits() qbits.convert_qkeras_quantizer(quantizers.quantized_bits(16, 7, 1)) return qbits elif mode == "int32": qbits = quantizer_impl.QuantizedBits() qbits.convert_qkeras_quantizer(quantizers.quantized_bits( 32, 10, 1)) return qbits else: try: # string to quantizer object q_name = "quantizers." + mode qkeras_object = eval(q_name) # pylint: disable=eval-used return self._make_quantizer_util(qkeras_object) except: # pylint: disable=bare-except raise ValueError("unaccepted quantizer {}!".format(mode))
def qconv_model(): x = x_in = keras.layers.Input((23, 23, 1), name="input") x = QActivation("quantized_relu(4)", name="QA_0")(x) x = QConv2D(16, 2, 2, kernel_quantizer=quantizers.binary(), bias_quantizer=quantizers.ternary(), name="qconv2d_1")(x) x = QConv2D(8, 2, 2, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), activation=quantizers.quantized_relu(6, 2), name="qconv2D_2")(x) x = QConv2D(2, 2, 2, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), activation=quantizers.quantized_relu(6, 2), name="qconv2d_3")(x) x = QActivation("quantized_bits(6, 0, 1)", name="QA_4")(x) model = keras.Model(inputs=[x_in], outputs=[x]) return model
def multiply_qmodel(): # element-wise multiply a list of inputs. # It takes as input a list of tensors, all of the same shape, # and returns a single tensor (also of the same shape). x1 = input1 = keras.layers.Input((16, ), name="input_0") x1 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_bits(4, 0, 1), name="dense_0")(x1) x2 = input2 = keras.layers.Input(shape=(32, ), name="input_1") x2 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_bits(5, 0, 1), name="dense_1")(x2) x3 = input3 = keras.layers.Input(shape=(64, ), name="input_2") x3 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_bits(6, 0, 1), name="dense_2")(x3) x = keras.layers.multiply([x1, x2, x3], name="multiply") model = keras.Model(inputs=[input1, input2, input3], outputs=[x]) return model
def test_big_bias_quantizer(): q1 = quantizer_impl.QuantizedBits() q1.convert_qkeras_quantizer(quantizers.quantized_bits(8, 3)) q2 = quantizer_impl.QuantizedBits() q2.convert_qkeras_quantizer(quantizers.quantized_bits(16, 4)) r = adder_impl.FixedPointAdder(q1, q2) # int_bits = max(q1.int_bits, q2.int_bits) + 1 # bits = int_bits + sign_bit + max(q1_fraction_bit, q2_fraction bit) assert r.output.bits == 17 assert r.output.int_bits == 5
def po2_qbits_model(): x = x_in = keras.layers.Input((23, 23, 1), name="input") x = QActivation("quantized_relu_po2(3, 2)", name="QA_0")(x) x = QConv2D(16, 2, 2, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="qconv2d_1")(x) model = keras.Model(inputs=[x_in], outputs=[x]) return model
def test_qbn_inference(): input_quantizers = [quantizers.quantized_bits(4, 0, 1)] (hw_weight_dict, model) = qbn_model_inference() dtype_dict = run(model, input_quantizers, is_inference=True, hw_weight_dict=hw_weight_dict) multiplier = dtype_dict["qconv2d_1"]["multiplier"] accumulator = dtype_dict["qconv2d_1"]["accumulator"] output = dtype_dict["qconv2d_1"]["output_quantizer"] fused_accumulator = dtype_dict["qconv2d_1"]["fused_accumulator"] assert multiplier["quantizer_type"] == "quantized_bits" assert multiplier["bits"] == 7 assert multiplier["int_bits"] == 1 assert multiplier["is_signed"] == 1 assert multiplier["op_type"] == "mul" assert accumulator["quantizer_type"] == "quantized_bits" assert accumulator["bits"] == 9 assert accumulator["int_bits"] == 3 assert accumulator["is_signed"] == 1 assert accumulator["op_type"] == "add" assert fused_accumulator["quantizer_type"] == "quantized_bits" assert fused_accumulator["bits"] == 25 assert fused_accumulator["int_bits"] == 4 assert accumulator["is_signed"] == 1 assert fused_accumulator["op_type"] == "add"
def test_qnoise_quantized_bits(): # 1 sign bit, 1 integer bit, and 2 fractional bits. bits = 4 integer = 1 symmetric = True keep_negative = True alpha = 1 use_stochastic_rounding = False qb = quantized_bits(bits=bits, integer=integer, symmetric=symmetric, keep_negative=keep_negative, alpha=alpha, use_stochastic_rounding=use_stochastic_rounding) inputs = np.array([0.0, 0.5, -0.5, 0.6, -0.6, 2.0, -2.0], dtype=np.float32) x = np.array([0.0, 0.5, -0.5, 0.6, -0.6, 2.0, -2.0], dtype=np.float32) xq = np.array([0.0, 0.5, -0.5, 0.5, -0.5, 1.75, -1.75], dtype=np.float32) x_xq = 0.5 * (x + xq) # no quantization x_q_0 = qb(inputs, qnoise_factor=0.0) assert_equal(x_q_0, x) # full quantization x_q_1 = qb(inputs, qnoise_factor=1.0) assert_equal(x_q_1, xq) # mixing half and half of x and xq x_q_05 = qb(inputs, qnoise_factor=0.5) assert_equal(x_q_05, x_xq)
def test_qbn_inference(): input_quantizers = [quantizers.quantized_bits(4, 0, 1)] model = qbn_model_inference() dtype_dict = run(model, input_quantizers, is_inference=True) multiplier = dtype_dict["qconv2d_3"]["multiplier"] accumulator = dtype_dict["qconv2d_3"]["accumulator"] output = dtype_dict["qconv2d_3"]["output_quantizer"] assert multiplier["quantizer_type"] == "quantized_bits" assert multiplier["bits"] == 15 assert multiplier["int_bits"] == 7 assert multiplier["is_signed"] == 1 assert multiplier["op_type"] == "shifter" assert accumulator["quantizer_type"] == "quantized_bits" assert accumulator["bits"] == 18 assert accumulator["int_bits"] == 10 assert accumulator["is_signed"] == 1 assert accumulator["op_type"] == "add" assert output["quantizer_type"] == "quantized_bits" assert output["bits"] == 18 assert output["int_bits"] == 10 assert output["is_signed"] == 1
def qbn_model_inference(): x = x_in = keras.layers.Input((23, 23, 1), name="input") x = QConv2D(4, 2, 23, kernel_quantizer=quantizers.quantized_bits(4, 0, 1, alpha=1.0), bias_quantizer=quantizers.quantized_bits(4, 0, 1, alpha=1.0), use_bias=False, name="qconv2d_1")(x) x = QBatchNormalization(mean_quantizer=quantizers.quantized_bits(6, 0, 1), gamma_quantizer=None, variance_quantizer=None, beta_quantizer=quantizers.quantized_bits(6, 0, 1), inverse_quantizer=quantizers.quantized_bits( 16, 0, 1), scale=False, center=False, gamma_range=8, beta_range=4, name="qbn_2")(x) x = QConv2D(2, 1, 1, kernel_quantizer=quantizers.quantized_bits(3, 0), bias_quantizer=quantizers.quantized_bits(3, 2), name="qconv2d_3")(x) model = keras.Model(inputs=[x_in], outputs=[x]) hw_weight_dict = model_save_quantized_weights(model) return (hw_weight_dict, model)
def hybrid_model(): """hybrid model that mixes qkeras and keras layers.""" x = x_in = keras.layers.Input((784, ), name="input") x = keras.layers.Dense(300, name="d0")(x) x = keras.layers.Activation("relu", name="d0_act")(x) x = QDense(100, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="d1")(x) x = QActivation("quantized_relu(4,0)", name="d1_qr4")(x) x = QDense(10, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="d2")(x) x = keras.layers.Activation("softmax", name="softmax")(x) return keras.Model(inputs=[x_in], outputs=[x])
def test_single_dense_activation_exact(randX_100_16, bits, alpha): ''' Test a single Dense -> Activation layer topology for bit exactness with number of bits parameter ''' X = randX_100_16 model = Sequential() model.add( QDense(16, input_shape=(16, ), name='fc1', kernel_quantizer=quantized_bits(bits, 0, alpha=alpha), bias_quantizer=quantized_bits(bits, 0, alpha=1), kernel_initializer='lecun_uniform')) model.add(QActivation(activation=quantized_relu(bits, 0), name='relu1')) model.compile() hls4ml.model.optimizer.get_optimizer( 'output_rounding_saturation_mode').configure( layers=['relu1'], rounding_mode='AP_RND_CONV', saturation_mode='AP_SAT') config = hls4ml.utils.config_from_keras_model(model, granularity='name') hls_model = hls4ml.converters.convert_from_keras_model( model, hls_config=config, output_dir=str( test_root_path / 'hls4mlprj_qkeras_single_dense_activation_exact_{}_{}'.format( bits, alpha)), part='xcu250-figd2104-2L-e') hls4ml.model.optimizer.get_optimizer( 'output_rounding_saturation_mode').configure(layers=[]) hls_model.compile() y_qkeras = model.predict(X) y_hls4ml = hls_model.predict(X) # Goal is to get it passing with all equal #np.testing.assert_array_equal(y_qkeras, y_hls4ml) # For now allow matching within 1 bit np.testing.assert_allclose(y_qkeras.ravel(), y_hls4ml.ravel(), atol=2**-bits, rtol=1.0)
def gen_model(img_shape): img_input = x = keras.Input(shape=img_shape) x = QConv2D(filters=5, kernel_size=4, strides=4, kernel_quantizer=quantizers.quantized_bits( 8, 3, alpha="auto_po2"), bias_quantizer=quantizers.quantized_bits(8, 3), name="conv")(x) x = QActivation(activation=quantizers.quantized_relu(4, 0), name="act")(x) x = keras.layers.Flatten(name="flatten")(x) x = QDense(5, kernel_quantizer=quantizers.quantized_bits( 8, 0, alpha="auto_po2"), bias_quantizer=quantizers.quantized_bits(8, 3), name="dense")(x) model = keras.Model(inputs=img_input, outputs=[x]) return model
def qdense_model(Inputs, l1Reg=0, bits=6, ints=0, h5fName=None): x = QDense(21, activation=None, kernel_initializer='lecun_uniform', kernel_regularizer=l1(l1Reg), bias_regularizer=l1(l1Reg), kernel_quantizer=quantized_bits(bits, ints, alpha=1), bias_quantizer=quantized_bits(6, 0, alpha=1), name="Dense_Layer_1")(Inputs) x = QActivation(activation=quantized_relu(bits, ints), name="Relu_Layer_1")(x) x = QDense(22, activation=None, kernel_initializer='lecun_uniform', kernel_regularizer=l1(l1Reg), bias_regularizer=l1(l1Reg), kernel_quantizer=quantized_bits(bits, ints, alpha=1), bias_quantizer=quantized_bits(bits, ints, alpha=1), name="Dense_Layer_2")(x) x = QActivation(activation=quantized_relu(bits, ints), name="Relu_Layer_2")(x) x = QDense(8, activation=None, kernel_initializer='lecun_uniform', kernel_regularizer=l1(l1Reg), bias_regularizer=l1(l1Reg), kernel_quantizer=quantized_bits(bits, ints, alpha=1), bias_quantizer=quantized_bits(bits, ints, alpha=1), name="Dense_Layer_3")(x) x = QActivation(activation=quantized_relu(bits), name="Relu_Layer_3")(x) x = QDense(1, activation=None, kernel_initializer='lecun_uniform', kernel_regularizer=l1(l1Reg), bias_regularizer=l1(l1Reg), kernel_quantizer=quantized_bits(bits, ints, alpha=1), bias_quantizer=quantized_bits(bits, ints, alpha=1), name="Dense_Layer_4")(x) #x = QActivation("quantized_bits(20,5)",name="Final_quantization")(x) predictions = Activation(activation='sigmoid', name="Sigmoid_Output_Layer")(x) model = Model(inputs=Inputs, outputs=predictions) return (model)
def qbn_model_inference(): x = x_in = keras.layers.Input((23, 23, 1), name="input") x = QConv2D(4, 2, 23, kernel_quantizer=quantizers.quantized_ulaw(4, 1, 1), bias_quantizer=quantizers.stochastic_ternary(), use_bias=False, name="qconv2d_1")(x) x = QBatchNormalization(gamma_quantizer=quantizers.quantized_relu_po2( 3, 2), variance_quantizer=quantizers.quantized_po2( 3, 2, quadratic_approximation=False), beta_quantizer=quantizers.quantized_bits(6, 0, 1), scale=False, center=False, gamma_range=8, beta_range=4, name="qbn_2")(x) x = QConv2D(2, 1, 1, kernel_quantizer=quantizers.quantized_po2(3, 0), bias_quantizer=quantizers.quantized_po2(3, 2), name="qconv2d_3")(x) model = keras.Model(inputs=[x_in], outputs=[x]) layer = model.get_layer("qbn_2") weight_arr = [ np.array([3, 4, 1, 7]), np.array([6, 4, 1, -7]), np.array([2, 7, -8, 2]), np.array([-1, -7, 4, 9]) ] # quantize the weights quantizer_list = layer.get_quantizers() for (i, quantizer) in enumerate(quantizer_list): if quantizer is not None: weight_arr[i] = keras.backend.eval( quantizer(keras.backend.constant(weight_arr[i]))) num_weights = 4 if not layer.scale: num_weights -= 1 if not layer.center: num_weights -= 1 layer.set_weights(weight_arr[:num_weights]) return model
def float_po2_model(): x = x_in = keras.layers.Input((23, 23, 1), name="input") x = QConv2D(16, 2, 2, kernel_quantizer=quantizers.quantized_po2(5, 0), bias_quantizer=quantizers.quantized_po2(5, 0), name="qconv2d_1")(x) x = QActivation("quantized_relu_po2(3, 2)", name="QA_0")(x) x = QConv2D(10, 2, 2, kernel_quantizer=quantizers.quantized_bits(5, 2, 1), bias_quantizer=quantizers.quantized_bits(5, 2, 1), name="qconv2d_0")(x) model = keras.Model(inputs=[x_in], outputs=[x]) for layer in model.layers: print(layer) print(layer.output_shape) return model
def maximum_qmodel(quantizer1, quantizer2, quantizer3): # element-wise maximum/minimum/average of a list of inputs. # It takes as input a list of tensors, all of the same shape, # and returns a single tensor (also of the same shape). x1 = input1 = keras.layers.Input((16, ), name="input_0") x1 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizer1, name="qdense_0")(x1) x2 = input2 = keras.layers.Input(shape=(32, ), name="input_1") x2 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizer2, name="dense_1")(x2) x3 = input3 = keras.layers.Input(shape=(64, ), name="input_2") x3 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizer3, name="dense_2")(x3) x = keras.layers.maximum([x1, x2, x3], name="maximum") model = keras.Model(inputs=[input1, input2, input3], outputs=[x]) return model
def test_QuantizedBits(): qkeras_quantizer = quantizers.quantized_bits() qtools_quantizer = quantizer_impl.QuantizedBits() qtools_quantizer.convert_qkeras_quantizer(qkeras_quantizer) new_quantizer = qtools_quantizer.convert_to_qkeras_quantizer( symmetric=qkeras_quantizer.symmetric, alpha=qkeras_quantizer.alpha, use_stochastic_rounding=qkeras_quantizer.use_stochastic_rounding, scale_axis=qkeras_quantizer.scale_axis, qnoise_factor=qkeras_quantizer.qnoise_factor) result = new_quantizer.__dict__ for (key, val) in result.items(): assert_equal(val, qkeras_quantizer.__dict__[key])
def test_auto_po2(): def gen_model(img_shape): img_input = x = keras.Input(shape=img_shape) x = QConv2D(filters=5, kernel_size=4, strides=4, kernel_quantizer=quantizers.quantized_bits( 8, 3, alpha="auto_po2"), bias_quantizer=quantizers.quantized_bits(8, 3), name="conv")(x) x = QActivation(activation=quantizers.quantized_relu(4, 0), name="act")(x) x = keras.layers.Flatten(name="flatten")(x) x = QDense(5, kernel_quantizer=quantizers.quantized_bits( 8, 0, alpha="auto_po2"), bias_quantizer=quantizers.quantized_bits(8, 3), name="dense")(x) model = keras.Model(inputs=img_input, outputs=[x]) return model model = gen_model(( 32, 32, 3, )) model.compile(loss="mse", run_eagerly=True) model.layers[1].quantizers[0].scale = tf.constant( [[[[0.0625, 0.0625, 0.0625, 0.0625, 0.03125]]]]) model.layers[4].quantizers[0].scale = tf.constant( [[0.5, 0.5, 1, 0.5, 0.25]]) input_quantizers = [ quantizers.quantized_bits(bits=8, integer=0, keep_negative=False) ] dtype_dict = run(model, input_quantizers) multiplier = dtype_dict["conv"]["multiplier"] assert multiplier["quantizer_type"] == "quantized_bits" # Original multiplier has 16 bits(16=8+8) and 3 int_bits # Modified multiplier has bits = max_fractional_bits + max_int_bits # = bits + max_shift - min_shift # max_shift = log2(0.0625) = -4 min_shift=log2(0.03125) = -5 # Therefore modified multiplier bits = 17 assert multiplier["bits"] == 17 # Modified multiplier int_bits = int_bits + max_shift = 3 - 4 = -1 # Because in datatype map we add int_bits with 1 extra sign bit, therefore # we expect to see multiplier["int_bits"] == 0. assert multiplier["int_bits"] == 0 multiplier = dtype_dict["dense"]["multiplier"] assert multiplier["quantizer_type"] == "quantized_bits" assert multiplier["bits"] == 14 assert multiplier["int_bits"] == 1
def test_pooling(): input_quantizers = [quantizers.quantized_bits(8, 0, 1)] model = pooling_qmodel() dtype_dict = run(model, input_quantizers) accumulator = dtype_dict["avg_pooling"]["pool_sum_accumulator"] assert accumulator["quantizer_type"] == "quantized_bits" assert accumulator["bits"] == 10 assert accumulator["int_bits"] == 3 accumulator = dtype_dict["global_avg_pooling"]["pool_sum_accumulator"] assert accumulator["quantizer_type"] == "quantized_bits" assert accumulator["bits"] == 16 assert accumulator["int_bits"] == 9
def qdense_model_fork(): x = x_in = keras.layers.Input((23, ), name="input") x = QDense(10, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_po2(3, 1), name="qdense_0")(x) x = QDense(20, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_relu(6, 2), name="qdense_1")(x) x = QActivation("quantized_relu(4)", name="QA_2")(x) x_1 = QDense(30, kernel_quantizer=quantizers.binary(), bias_quantizer=quantizers.binary(), name="qdense_3")(x) x_2 = QActivation("quantized_relu(6,2)", name="QA_3")(x) model = keras.Model(inputs=[x_in], outputs=[ x_1, x_2, ]) return model
def convert_to_qkeras_quantizer(self, symmetric=1, alpha=None, use_stochastic_rounding=False, scale_axis=None, qnoise_factor=1.0): """convert qtools quantizer to qkeras quantizer.""" return quantizers.quantized_bits( bits=self.bits, integer=self.int_bits, keep_negative=self.is_signed, symmetric=symmetric, alpha=alpha, use_stochastic_rounding=use_stochastic_rounding, scale_axis=scale_axis, qnoise_factor=qnoise_factor)
def test_qdense_model_fork(): input_quantizers = [quantizers.quantized_bits(4, 0, 1)] model = qdense_model_fork() dtype_dict = run(model, input_quantizers) multiplier = dtype_dict["qdense_3"]["multiplier"] assert multiplier["quantizer_type"] == "quantized_bits" assert multiplier["bits"] == 5 assert multiplier["int_bits"] == 1 assert multiplier["is_signed"] == 1 assert multiplier["op_type"] == "mux" accumulator = dtype_dict["qdense_3"]["accumulator"] assert accumulator["quantizer_type"] == "quantized_bits" assert accumulator["bits"] == 11 assert accumulator["int_bits"] == 7 assert accumulator["is_signed"] == 1 assert accumulator["op_type"] == "add"
def test_wrong_input_quantizers(): input_quantizers = [ quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 1), quantizers.quantized_bits(6, 0, 1) ] # INPUT_QUANTIZERS = None x1 = input1 = keras.layers.Input((16, ), name="input_0") x1 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), name="dense_0")(x1) x2 = input2 = keras.layers.Input(shape=(32, ), name="input_1") x2 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), name="dense_1")(x2) x = keras.layers.add([x1, x2], name="add") model = keras.Model(inputs=[input1, input2], outputs=[x]) with pytest.raises(qgraph.WrongInputQuantizerError): run(model, input_quantizers)
def get_model(quantize=False): x1 = input1 = keras.layers.Input((16, 16, 3), name="input_0") if quantize: x1 = QConv2D(16, 2, 2, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), name="conv_0")(x1) else: x1 = keras.layers.Conv2D(16, 2, 2, name="conv_0")(x1) x2 = input2 = keras.layers.Input(shape=(16, 16, 3), name="input_1") if quantize: x2 = QConv2D(16, 2, 2, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), name="conv_1")(x2) else: x2 = keras.layers.Conv2D(16, 2, 2, name="conv_1")(x2) x = keras.layers.add([x1, x2], name="add") if quantize: x = QActivation(activation="quantized_relu(8, 2)", name="relu")(x) else: x = keras.layers.Activation("relu", name="relu")(x) if quantize: x = QConv2D(2, 2, 2, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), name="conv_2")(x) else: x = keras.layers.Conv2D(2, 2, 2, name="conv_2")(x) model = keras.Model(inputs=[input1, input2], outputs=[x]) return model
reference_internal = "int8" reference_accumulator = "int32" # By setting for_reference=True, we create QTools object which uses # keras_quantizer to quantize weights/bias and # keras_accumulator to quantize MAC variables for all layers. Obviously, this # overwrites any quantizers that user specified in the qkeras layers. The # purpose of doing so is to enable user to calculate a baseline energy number # for a given model architecture and compare it against quantized models. q = run_qtools.QTools( model, # energy calculation using a given process process="horowitz", # quantizers for model input source_quantizers=[quantizers.quantized_bits(8, 0, 1)], is_inference=False, # absolute path (including filename) of the model weights weights_path=None, # keras_quantizer to quantize weight/bias in un-quantized keras layers keras_quantizer=reference_internal, # keras_quantizer to quantize MAC in un-quantized keras layers keras_accumulator=reference_accumulator, # whether calculate baseline energy for_reference=True) # caculate energy of the derived data type map. ref_energy_dict = q.pe( # whether to store parameters in dram, sram, or fixed weights_on_memory="sram", # store activations in dram or sram
def Q_baseline_model(size, epochs, optimizer, X_training, y_training, X_validation, y_validation, output_name): ''' NN Model constructor with loss and accuracy plots. Parameters ---------- size : int Batch size used in the training process. epochs : int Number of epochs the model will be trained. optimizer : keras.optimizer Optimizer function. X_training : Numpy array Training data set. y_training : Numpy array True labels for the training set. X_validation : Numpy array Validation data set. y_validation : Numpy array True labels for the validation set. output_name : str Name used for saved plots. Returns ------- model : qkeras.sequential QKeras model. w : numpy array Array of final weights used in the model for later inference. ''' pruning = False # create model name = "RMSE validation" name2 = "RMSE training" history = History() model = Sequential() model.add( QDense(60, input_shape=(27, ), kernel_quantizer=quantized_bits(16, 1), bias_quantizer=quantized_bits(16, 1), kernel_initializer='random_normal')) model.add(QActivation(activation=quantized_relu(16, 1), name='relu1')) model.add( QDense(50, kernel_quantizer=quantized_bits(16, 1), bias_quantizer=quantized_bits(16, 1))) model.add(QActivation(activation=quantized_relu(16, 1), name='relu2')) # model.add(Dropout(rate=0.2)) model.add( QDense(30, kernel_quantizer=quantized_bits(16, 1), bias_quantizer=quantized_bits(16, 1))) model.add(QActivation(activation=quantized_relu(16, 1), name='relu3')) model.add( QDense(40, kernel_quantizer=quantized_bits(16, 1), bias_quantizer=quantized_bits(16, 1))) model.add(QActivation(activation=quantized_relu(16, 1), name='relu4')) model.add( QDense(15, kernel_quantizer=quantized_bits(16, 1), bias_quantizer=quantized_bits(16, 1))) model.add(QActivation(activation=quantized_relu(16, 1), name='relu5')) # model.add(QDense(80, input_shape=(27,),kernel_quantizer=quantized_bits(16,1),bias_quantizer=quantized_bits(16,1), kernel_initializer='random_normal')) # model.add(QActivation(activation=quantized_relu(16,1), name='relu1')) # model.add(QDense(50,kernel_quantizer=quantized_bits(16,1),bias_quantizer=quantized_bits(16,1))) # model.add(QActivation(activation=quantized_relu(16,1), name='relu2')) # model.add(QDense(35,kernel_quantizer=quantized_bits(16,1),bias_quantizer=quantized_bits(16,1))) # model.add(QActivation(activation=quantized_relu(16,1), name='relu3')) # # # model.add(Dropout(rate=0.2)) model.add(QDense(1, kernel_quantizer=quantized_bits(16, 1))) model.add(QActivation(activation=quantized_relu(16, 1), name='relu6')) #model.add(Activation("sigmoid")) # model.add(QActivation(activation=quantized_tanh(16,1),name='tanh')) if pruning == True: print("////////////////////////Training Model with pruning") pruning_params = { "pruning_schedule": pruning_schedule.ConstantSparsity(0.75, begin_step=2000, frequency=100) } model = prune.prune_low_magnitude(model, **pruning_params) model.compile(loss='mean_squared_error', optimizer=optimizer) model.fit(X_training, y_training, batch_size=size, epochs=epochs, verbose=1, validation_data=(X_validation, y_validation), callbacks=[history, pruning_callbacks.UpdatePruningStep()]) model = strip_pruning(model) w = model.layers[0].weights[0].numpy() h, b = np.histogram(w, bins=100) plt.figure(figsize=(7, 7)) plt.bar(b[:-1], h, width=b[1] - b[0]) plt.semilogy() plt.savefig("Zeros' distribution", format='png') print('% of zeros = {}'.format(np.sum(w == 0) / np.size(w))) else: print("////////////////////////Training Model WITHOUT pruning") model.compile(loss='mean_squared_error', optimizer=optimizer) model.fit(X_training, y_training, batch_size=size, epochs=epochs, verbose=1, validation_data=(X_validation, y_validation), callbacks=[history]) # Compile model # model.compile(loss='mean_squared_error', optimizer=optimizer) # model.fit(X_training, y_training, # batch_size=size, # epochs=epochs, # verbose=1, # validation_data=(X_validation, y_validation),callbacks=[history]) w = [] for layer in model.layers: print(layer) w.append(layer.get_weights()) #print(w) train_predictions = model.predict(X_training) predictions = model.predict(X_validation) lin_mse = mean_squared_error(y_validation, predictions) lin_rmse = np.sqrt(lin_mse) lin_mse2 = mean_squared_error(y_training, train_predictions) lin_rmse2 = np.sqrt(lin_mse2) msg = "%s: %f" % (name, lin_rmse) msg2 = "%s: %f" % (name2, lin_rmse2) print(msg) print(msg2) fig, ax = plt.subplots() # xy=np.vstack([y_validation, predictions]) #z=gaussian_kde(xy) ax.scatter(y_validation, predictions, edgecolors=(0, 0, 0)) ax.set_title('Regression model predictions (validation set)') ax.set_xlabel('Measured $p_T$ (GeV/c)') ax.set_ylabel('Predicted $p_T$ (GeV/c)') ax.plot([Y.min(), Y.max()], [Y.min(), Y.max()], 'k--', lw=4) plt.rc('font', size=20) plt.rc('axes', titlesize=18) plt.rc('axes', labelsize=18) plt.rc('xtick', labelsize=18) plt.rc('ytick', labelsize=18) plt.rc('legend', fontsize=18) plt.rc('figure', titlesize=18) plt.tight_layout() plt.savefig(outrootname + '/' + '1' + output_name, format='png', dpi=800) fig2, ax2 = plt.subplots() ax2.plot(history.history['loss'], label='loss') ax2.plot(history.history['val_loss'], label='val_loss') ax2.set_title('Training and Validation loss per epoch') ax2.set_xlabel('# Epoch') ax2.set_ylabel('loss') plt.legend() plt.tight_layout() plt.savefig(outrootname + '/' + '2' + output_name, format='png', dpi=800) #plt.show() del ax, ax2 return model, w
from tensorflow.keras.models import Sequential from tensorflow.keras.optimizers import Adam from tensorflow.keras.regularizers import l1 from callbacks import all_callbacks from tensorflow.keras.layers import Activation, MaxPooling2D, Flatten from qkeras.qlayers import QDense, QActivation from qkeras.qconvolutional import QConv2D from qkeras.quantizers import quantized_bits, quantized_relu model = Sequential() model.add( QConv2D(8, (4, 4), strides=(1, 1), input_shape=(32, 32, 1), kernel_quantizer=quantized_bits(14, 2), bias_quantizer=quantized_bits(14, 2), name="conv2d_0_m")) model.add(QActivation(activation=quantized_relu(14, 2), name='relu1')) model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='max1')) model.add( QConv2D(16, (2, 2), strides=(1, 1), kernel_quantizer=quantized_bits(14, 2), bias_quantizer=quantized_bits(14, 2), name="conv2d_1_m")) model.add(QActivation(activation=quantized_relu(14, 2), name='relu2')) model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='max2'))
name='dense')) if use_batchnorm: model.add(BatchNormalization(name='bn')) model.add(QActivation(activation=activation_quantizer)) model.compile() return model, is_xnor, test_no @pytest.fixture(scope='module') def randX_100_10(): return randX(100, 10) @pytest.mark.parametrize( 'test_no,N,kernel_quantizer,bias_quantizer,activation_quantizer,use_batchnorm,is_xnor', [(1, 10, ternary(alpha=1), quantized_bits(5, 2), 'binary_tanh', False, False), (2, 10, binary(), quantized_bits(5, 2), 'binary_tanh', False, True), (3, 10, ternary(alpha='auto'), quantized_bits(5, 2), binary(), True, True), (4, 10, ternary(alpha='auto'), quantized_bits(5, 2), 'ternary', True, False), (5, 10, ternary(alpha='auto'), quantized_bits( 5, 2), ternary(threshold=0.2), True, False), (6, 10, ternary(alpha='auto'), quantized_bits( 5, 2), ternary(threshold=0.8), True, False), (7, 10, binary(), quantized_bits(5, 2), binary(), False, True)]) def test_btnn(make_btnn, randX_100_10): model, is_xnor, test_no = make_btnn X = randX_100_10 cfg = hls4ml.utils.config_from_keras_model(model, granularity='name') hls_model = hls4ml.converters.convert_from_keras_model(
def test_merge_layers(): input_quantizers = [ quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 1), quantizers.quantized_bits(6, 0, 1) ] model = add_qmodel(quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 0), quantizers.quantized_bits(6, 0, 1)) dtype_dict = run(model, input_quantizers) merge_quantizer = dtype_dict["add"]["Add_quantizer"] assert merge_quantizer["quantizer_type"] == "quantized_bits" assert merge_quantizer["bits"] == 7 assert merge_quantizer["int_bits"] == 2 assert merge_quantizer["is_signed"] == 1 model = multiply_qmodel() dtype_dict = run(model, input_quantizers) merge_quantizer = dtype_dict["multiply"]["Multiply_quantizer"] assert merge_quantizer["quantizer_type"] == "quantized_bits" assert merge_quantizer["bits"] == 13 assert merge_quantizer["int_bits"] == 1 assert merge_quantizer["is_signed"] == 1 assert merge_quantizer["op_type"] == "mul" model = maximum_qmodel(quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 0), quantizers.quantized_bits(6, 0, 1)) dtype_dict = run(model, input_quantizers) merge_quantizer = dtype_dict["maximum"]["Maximum_quantizer"] assert merge_quantizer["quantizer_type"] == "quantized_bits" assert merge_quantizer["bits"] == 6 assert merge_quantizer["int_bits"] == 1 assert merge_quantizer["is_signed"] == 1 model = concatenate_qmodel(quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 0), quantizers.quantized_bits(6, 0, 1)) dtype_dict = run(model, input_quantizers) merge_quantizer = dtype_dict["concatenate"]["Concatenate_quantizer"] assert merge_quantizer["quantizer_type"] == "quantized_bits" assert merge_quantizer["bits"] == 14 assert merge_quantizer["int_bits"] == 4 assert merge_quantizer["is_signed"] == 1
def test_qenergy(): x = x_in = keras.layers.Input((784, ), name="input") x = QDense(300, kernel_quantizer=quantizers.binary(), bias_quantizer=quantizers.binary(), name="d0")(x) x = QActivation("quantized_relu(4,0)", name="d0_qr4")(x) x = QDense(100, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="d1")(x) x = QAdaptiveActivation("quantized_relu", 4, name="d1_qr4")(x) x = QDense(10, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="d2")(x) x = keras.layers.Activation("softmax", name="softmax")(x) model = keras.Model(inputs=[x_in], outputs=[x]) # print(model.summary()) reference_internal = "int8" reference_accumulator = "int32" # get reference energy cost q = run_qtools.QTools(model, process="horowitz", source_quantizers=reference_internal, is_inference=False, weights_path=None, keras_quantizer=reference_internal, keras_accumulator=reference_accumulator, for_reference=True) ref_energy_dict = q.pe(weights_on_memory="sram", activations_on_memory="sram", min_sram_size=8 * 16 * 1024 * 1024, rd_wr_on_io=False) reference_size = q.extract_energy_sum(qtools_settings.cfg.include_energy, ref_energy_dict) # get trial energy cost q = run_qtools.QTools(model, process="horowitz", source_quantizers=reference_internal, is_inference=False, weights_path=None, keras_quantizer=reference_internal, keras_accumulator=reference_accumulator, for_reference=False) trial_energy_dict = q.pe(weights_on_memory="sram", activations_on_memory="sram", min_sram_size=8 * 16 * 1024 * 1024, rd_wr_on_io=False) trial_size = q.extract_energy_sum(qtools_settings.cfg.include_energy, trial_energy_dict) # Reference energy number is now updated with keras_accumulator as # output quantizer tmp = ref_energy_dict["d0"]["energy"] assert tmp["inputs"] == pytest.approx(372.77, abs=0.1) assert tmp["outputs"] == pytest.approx(570.57, abs=0.1) assert tmp["parameters"] == pytest.approx(111975.96, abs=0.1) assert tmp["op_cost"] == pytest.approx(70560.0, abs=0.1) tmp = ref_energy_dict["d1"]["energy"] assert tmp["inputs"] == pytest.approx(570.57, abs=0.1) assert tmp["outputs"] == pytest.approx(190.19, abs=0.1) assert tmp["parameters"] == pytest.approx(14313.66, abs=0.1) assert tmp["op_cost"] == pytest.approx(26500.0, abs=0.1) tmp = ref_energy_dict["d2"]["energy"] assert tmp["inputs"] == pytest.approx(190.19, abs=0.1) assert tmp["outputs"] == pytest.approx(19.02, abs=0.1) assert tmp["parameters"] == pytest.approx(483.08, abs=0.1) assert tmp["op_cost"] == pytest.approx(883.33, abs=0.1) # Trial tmp = trial_energy_dict["d0"]["energy"] assert tmp["inputs"] == pytest.approx(372.77, abs=0.1) assert tmp["outputs"] == pytest.approx(342.34, abs=0.1) assert tmp["parameters"] == pytest.approx(13997.95, abs=0.1) assert tmp["op_cost"] == pytest.approx(15729.0, abs=0.1) tmp = trial_energy_dict["d1"]["energy"] assert tmp["inputs"] == pytest.approx(72.27, abs=0.1) assert tmp["outputs"] == pytest.approx(110.31, abs=0.1) assert tmp["parameters"] == pytest.approx(7158.73, abs=0.1) assert tmp["op_cost"] == pytest.approx(3250.0, abs=0.1) tmp = trial_energy_dict["d2"]["energy"] assert tmp["inputs"] == pytest.approx(26.63, abs=0.1) assert tmp["outputs"] == pytest.approx(11.41, abs=0.1) assert tmp["parameters"] == pytest.approx(243.44, abs=0.1) assert tmp["op_cost"] == pytest.approx(102.08, abs=0.1) # print(ref_energy_dict) # print(trial_energy_dict) assert int(reference_size) == 226629 assert int(trial_size) == 41070