def QDenseModel(weights_f, load_weights=False): """Construct QDenseModel.""" x = x_in = Input((RESHAPED, ), name="input") x = QActivation("quantized_relu(4)", name="act_i")(x) x = QDense(N_HIDDEN, kernel_quantizer=ternary(), bias_quantizer=quantized_bits(4, 0, 1), name="dense0")(x) x = QActivation("quantized_relu(2)", name="act0")(x) x = QDense(NB_CLASSES, kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name="dense2")(x) x = Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) model.summary() model.compile(loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"]) if load_weights and weights_f: model.load_weights(weights_f) print_qstats(model) return model
def _skip_connection(self, y, downsample, n_filters_in): """Implement skip connection.""" # Deal with downsampling if downsample > 1: y = MaxPooling1D(downsample, strides=downsample, padding='same')(y) elif downsample == 1: y = y else: raise ValueError("Number of samples should always decrease.") # Deal with n_filters dimension increase if n_filters_in != self.n_filters_out: # This is one of the two alternatives presented in ResNet paper # Other option is to just fill the matrix with zeros. y = QConv1D(self.n_filters_out, 1, padding='same', use_bias=False, kernel_initializer=self.kernel_initializer, kernel_quantizer=quantized_bits(bits=10, integer=2, symmetric=0, keep_negative=1), bias_quantizer=quantized_bits(bits=10, integer=2, symmetric=0, keep_negative=1))(y) y = QActivation( "quantized_bits(bits=13, integer=2, symmetric=0, keep_negative=1)" )(y) return y
def QDenseModel(weights_f, load_weights=False): """Construct QDenseModel.""" x = x_in = Input((28 * 28, ), name="input") x = QActivation("quantized_relu(2)", name="act_i")(x) x = Dense(100, name="d0")(x) x = BatchNormalization(name="bn0")(x) x = QActivation("quantized_relu(2)", name="act0_m")(x) x = Flatten(name="flatten")(x) x = QDense(NB_CLASSES, kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name="dense2")(x) x = Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) model.summary() model.compile(loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"]) if load_weights and weights_f: model.load_weights(weights_f) return model
def test_qbidirectional(rnn, all_weights_signature, expected_output): K.set_learning_phase(0) np.random.seed(22) tf.random.set_seed(22) x = x_in = Input((2, 4), name='input') x = QBidirectional( rnn(16, activation="quantized_po2(8)", kernel_quantizer="quantized_po2(8)", recurrent_quantizer="quantized_po2(8)", bias_quantizer="quantized_po2(8)", name='qbirnn_0'))(x) x = QDense(4, kernel_quantizer=quantized_bits(8, 2, 1, alpha=1.0), bias_quantizer=quantized_bits(8, 0, 1), name='dense')(x) x = Activation('softmax', name='softmax')(x) model = Model(inputs=[x_in], outputs=[x]) # reload the model to ensure saving/loading works json_string = model.to_json() clear_session() model = quantized_model_from_json(json_string) # Save the model as an h5 file using Keras's model.save() fd, fname = tempfile.mkstemp('.h5') model.save(fname) del model # Delete the existing model # Return a compiled model identical to the previous one model = load_qmodel(fname) # Clean the created h5 file after loading the model os.close(fd) os.remove(fname) # apply quantizer to weights model_save_quantized_weights(model) all_weights = [] for layer in model.layers: for i, weights in enumerate(layer.get_weights()): w = np.sum(weights) all_weights.append(w) all_weights = np.array(all_weights) assert all_weights.size == all_weights_signature.size assert np.all(all_weights == all_weights_signature) # test forward: inputs = 2 * np.random.rand(10, 2, 4) actual_output = model.predict(inputs).astype(np.float16) assert_allclose(actual_output, expected_output, rtol=1e-4)
def test_dense(): data = getData() nBits = 8 nBitsInt = 4 qbits_param_input = qkr.quantized_bits(bits=nBits, integer=nBitsInt, keep_negative=0) qbits_param = qkr.quantized_bits(bits=nBits, integer=nBitsInt, keep_negative=1) # simple model only quantizes inputs = Input(shape=(4, 4, 3)) x = inputs x = Flatten(name="flatten")(x) x = QActivation(qbits_param_input, name='q_decoder_output')(x) encodedLayer = QDense(10, activation='relu', name='encoded_vector', kernel_quantizer=qbits_param, bias_quantizer=qbits_param)(x) model = Model(inputs, encodedLayer, name='encoder') model.summary() model.compile(loss='mse', optimizer='adam') val_input, train_input = split(data, 0.5) train_output = np.ones(50).reshape(5, 10) # garbage outputs for training val_output = np.ones(50).reshape(5, 10) # garbage outputs for validation es = kr.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3) history = model.fit(train_input, train_output, epochs=1, batch_size=500, shuffle=True, validation_data=(val_input, val_output), callbacks=[es]) val_output = model.predict(val_input) print('\nTEST DENSE') print('\nRaw validation output: \n', val_output) print( '\nMultiplied by 2^(decimal bits): \n Results should be integers * weight precision... \n', val_output * (2**(nBits - nBitsInt))) return
def build_model(input_shape): x = x_in = Input(shape=input_shape, name="input") x = QConv2D( 32, (2, 2), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_0_m")(x) x = QActivation("quantized_relu(4,0)", name="act0_m")(x) x = QConv2D( 64, (3, 3), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_1_m")(x) x = QActivation("quantized_relu(4,0)", name="act1_m")(x) x = QConv2D( 64, (2, 2), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_2_m")(x) x = QActivation("quantized_relu(4,0)", name="act2_m")(x) x = Flatten()(x) x = QDense(num_classes, kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="dense")(x) x = Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) return model
def test_quantized_bits(bits, integer, symmetric, keep_negative, test_values, expected_values): x = K.placeholder(ndim=2) f = K.function( [x], [quantized_bits(bits, integer, symmetric, keep_negative)(x)]) result = f([test_values])[0] assert_allclose(result, expected_values, rtol=1e-05)
def GetQbits(self, inp, keep_negative=1): print("Setting bits {} {} with keep negative = {}".format(inp['total'], inp['integer'], keep_negative)) b = qkr.quantized_bits(bits=inp['total'], integer=inp['integer'], keep_negative=keep_negative, alpha=1) print('max = %s, min = %s'%(b.max(),b.min())) print('str representation:%s'%(str(b))) print('config = ',b.get_config()) return b
def quantized_cnn(Inputs, nclasses, filters, kernel, strides, pooling, dropout, activation="quantized_relu(32,16)", quantizer_cnn=quantized_bits(1), quantizer_dense=quantized_bits(1)): length = len(filters) if any( len(lst) != length for lst in [filters, kernel, strides, pooling, dropout]): sys.exit( "One value for stride and kernel must be added for each filter! Exiting" ) x = x_in = Inputs for i, (f, k, s, p, d) in enumerate(zip(filters, kernel, strides, pooling, dropout)): print(( "Adding layer with {} filters, kernel_size=({},{}), strides=({},{})" ).format(f, k, k, s, s)) x = QConv2D(int(f), kernel_size=(int(k), int(k)), strides=(int(s), int(s)), kernel_quantizer=quantizer_cnn, bias_quantizer=quantizer_cnn, name='conv_%i' % i)(x) x = QActivation(activation)(x) x = BatchNormalization()(x) if float(p) != 0: x = MaxPooling2D(pool_size=(int(p), int(p)))(x) # x = Dropout(float(d))(x) x = Flatten()(x) x = QDense(128, kernel_quantizer=quantizer_dense, bias_quantizer=quantizer_dense)(x) x = QActivation(activation)(x) x = BatchNormalization()(x) x = Dense(nclasses)(x) model = Model(inputs=[x_in], outputs=[x]) return model
def build_baseline(image_size=16, nclasses=5,filters = [8,8,16]): inputs = tf.keras.Input((16),name="Input") x = QDense(64, kernel_quantizer = quantized_bits(4,0,1), bias_quantizer = quantized_bits(4,0,1),name="qdense_1")(inputs) x = QActivation('quantized_relu(4,2)',name="qact_1")(x) x = QDense(32, kernel_quantizer = 'ternary', bias_quantizer = 'ternary',name="qdense_2")(x) x = QActivation('quantized_relu(3,1)',name="qact_2")(x) x = QDense(32, kernel_quantizer = quantized_bits(2,1,1), bias_quantizer = quantized_bits(2,1,1),name="qdense_3")(x) x = QActivation('quantized_relu(4,2)',name="qact_3")(x) x = QDense(5, kernel_quantizer = 'stochastic_binary', bias_quantizer = quantized_bits(8,3,1),name="qdense_nclasses")(x) predictions = tf.keras.layers.Activation('softmax',name="softmax")(x) model = tf.keras.Model(inputs, predictions,name='baseline') return model
def qkeras_cnn(name_, Inputs, nclasses, filters, kernel, strides, pooling, dropout, activation, pruning_params={}, qb=quantized_bits(6, 0, alpha=1)): length = len(filters) if any( len(lst) != length for lst in [filters, kernel, strides, pooling, dropout]): sys.exit( "One value for stride and kernel must be added for each filter! Exiting" ) x = x_in = Inputs x = BatchNormalization()(x) x = ZeroPadding2D(padding=(1, 1), data_format="channels_last")(x) for i, (f, k, s, p, d) in enumerate(zip(filters, kernel, strides, pooling, dropout)): print(( "Adding layer with {} filters, kernel_size=({},{}), strides=({},{})" ).format(f, k, k, s, s)) x = QConv2D(int(f), kernel_size=(int(k), int(k)), strides=(int(s), int(s)), kernel_quantizer=qb, bias_quantizer=qb, kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001), use_bias=False, name='conv_%i' % i)(x) if float(p) != 0: x = MaxPooling2D(pool_size=(int(p), int(p)))(x) x = BatchNormalization()(x) x = Activation(activation, name='conv_act_%i' % i)(x) x = Flatten()(x) x = QDense(128, kernel_quantizer=qb, bias_quantizer=qb, kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001), name='dense_1', use_bias=False)(x) x = Dropout(0.25)(x) x = BatchNormalization()(x) x = Activation(activation, name='dense_act')(x) x_out = Dense(nclasses, activation='softmax', name='output')(x) model = Model(inputs=[x_in], outputs=[x_out], name=name_) return model
def test_qconv1d(): np.random.seed(33) x = Input(( 4, 4, )) y = QConv1D(2, 1, kernel_quantizer=quantized_bits(6, 2, 1), bias_quantizer=quantized_bits(4, 0, 1), name='qconv1d')(x) model = Model(inputs=x, outputs=y) for layer in model.layers: all_weights = [] for i, weights in enumerate(layer.get_weights()): input_size = np.prod(layer.input.shape.as_list()[1:]) if input_size is None: input_size = 10 * 10 shape = weights.shape assert input_size > 0, 'input size for {} {}'.format(layer.name, i) all_weights.append( 10.0 * np.random.normal(0.0, np.sqrt(2.0 / input_size), shape)) if all_weights: layer.set_weights(all_weights) # apply quantizer to weights model_save_quantized_weights(model) inputs = np.random.rand(2, 4, 4) p = model.predict(inputs).astype(np.float16) y = np.array([[[0.1309, -1.229], [-0.4165, -2.639], [-0.08105, -2.299], [1.981, -2.195]], [[-0.3174, -3.94], [-0.3352, -2.316], [0.105, -0.833], [0.2115, -2.89]]]).astype(np.float16) assert np.all(p == y)
def getModel(modelName, yamlConfig, input_shape): Filters = yamlConfig['Filters'].split(",") Kernel = yamlConfig['Kernel'].split(",") Strides = yamlConfig['Strides'].split(",") Pooling = yamlConfig['Pooling'].split(",") Dropout = yamlConfig['Dropout'].split(",") Activation = yamlConfig['Activation'] # with strategy.scope(): model = getattr(models, modelName) if 'quantized' in modelName: quantizer_conv = quantized_bits(int(yamlConfig['cnn_bits']), int(yamlConfig['cnn_integers']), 1) quantizer_dense = quantized_bits(int(yamlConfig['dense_bits']), int(yamlConfig['dense_integers']), 1) model = model(Input(input_shape), NCLASSES, Filters, Kernel, Strides, Pooling, Dropout, Activation, quantizer_dense, quantizer_conv) else: model = model(Input(input_shape), NCLASSES, Filters, Kernel, Strides, Pooling, Dropout, Activation) return model
def test_inputs(): data = getData() nBits = 8 nBitsInt = 4 qbits_param_input = qkr.quantized_bits(bits=nBits, integer=nBitsInt, keep_negative=0) # simple model only quantizes inputs = Input(shape=(4, 4, 3)) x = inputs x = Flatten(name="flatten")(x) x = QActivation(qbits_param_input, name='q_decoder_output')(x) model = Model(inputs, x, name='encoder') model.summary() model.compile(loss='mse', optimizer='adam') val_input, train_input = split(data, 0.5) train_output = np.ones(240).reshape(5, 48) # garbage outputs for training val_output = np.ones(240).reshape(5, 48) # garbage outputs for validation es = kr.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3) history = model.fit(train_input, train_output, epochs=1, batch_size=500, shuffle=True, validation_data=(val_input, val_output), callbacks=[es]) val_output = model.predict(val_input) print('\nTEST INPUTS') print('\nRaw validation output: \n', val_output) print('\nMultiplied by 2^(decimal bits): \n', val_output * (2**(nBits - nBitsInt))) return
def test_sequential_qnetwork(): model = tf.keras.Sequential() model.add(Input((28, 28, 1), name='input')) model.add( QConv2D(32, (2, 2), strides=(2, 2), kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_0_m')) model.add(QActivation(quantized_relu(4, 0), name='act0_m')) model.add( QConv2D(64, (3, 3), strides=(2, 2), kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_1_m')) model.add(QActivation(quantized_relu(4, 0), name='act1_m')) model.add( QConv2D(64, (2, 2), strides=(2, 2), kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_2_m')) model.add(QActivation(quantized_relu(4, 0), name='act2_m')) model.add(Flatten()) model.add( QDense(10, kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name='dense')) model.add(Activation('softmax', name='softmax')) # Check that all model operation were found correctly model_ops = extract_model_operations(model) for layer in model_ops.keys(): assert model_ops[layer]['type'][0] != 'null' return model
def build_layerwise_model(input_shape, **pruning_params): return Sequential([ prune.prune_low_magnitude( QConv2D( 32, (2, 2), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_0_m"), input_shape=input_shape, **pruning_params), QActivation("quantized_relu(4,0)", name="act0_m"), prune.prune_low_magnitude( QConv2D( 64, (3, 3), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_1_m"), **pruning_params), QActivation("quantized_relu(4,0)", name="act1_m"), prune.prune_low_magnitude( QConv2D( 64, (2, 2), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_2_m"), **pruning_params), QActivation("quantized_relu(4,0)", name="act2_m"), Flatten(), prune.prune_low_magnitude( QDense( num_classes, kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="dense"), **pruning_params), Activation("softmax", name="softmax") ])
# ----- Model ----- # kernel_size = 16 kernel_initializer = 'he_normal' signal = Input(shape=(4096, 12), dtype=np.float32, name='signal') age_range = Input(shape=(6, ), dtype=np.float32, name='age_range') is_male = Input(shape=(1, ), dtype=np.float32, name='is_male') x = signal x = QActivation( "quantized_bits(bits=13, integer=2, symmetric=0, keep_negative=1)")(x) x = QConv1D(64, kernel_size, padding='same', use_bias=False, kernel_initializer=kernel_initializer, kernel_quantizer=quantized_bits(bits=10, integer=2, symmetric=0, keep_negative=1), bias_quantizer=quantized_bits(bits=10, integer=2, symmetric=0, keep_negative=1))(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = QActivation( "quantized_bits(bits=13, integer=2, symmetric=0, keep_negative=1)")(x) x, y = ResidualUnit(512, 128, kernel_size=kernel_size, kernel_initializer=kernel_initializer)([x, x]) x, y = ResidualUnit(128, 196,
def main(): # check the mean value of samples from stochastic_rounding for po2 np.random.seed(42) count = 100000 val = 42 a = K.constant([val] * count) b = quantized_po2(use_stochastic_rounding=True)(a) res = np.sum(K.eval(b)) / count print(res, "should be close to ", val) b = quantized_relu_po2(use_stochastic_rounding=True)(a) res = np.sum(K.eval(b)) / count print(res, "should be close to ", val) a = K.constant([-1] * count) b = quantized_relu_po2(use_stochastic_rounding=True)(a) res = np.sum(K.eval(b)) / count print(res, "should be all ", 0) # non-stochastic rounding quantizer. a = K.constant([-3.0, -2.0, -1.0, -0.5, 0.0, 0.5, 1.0, 2.0, 3.0]) a = K.constant([0.194336]) print(" a =", K.eval(a).astype(np.float16)) print("qa =", K.eval(quantized_relu(6,2)(a)).astype(np.float16)) print("ss =", K.eval(smooth_sigmoid(a)).astype(np.float16)) print("hs =", K.eval(hard_sigmoid(a)).astype(np.float16)) print("ht =", K.eval(hard_tanh(a)).astype(np.float16)) print("st =", K.eval(smooth_tanh(a)).astype(np.float16)) c = K.constant(np.arange(-1.5, 1.51, 0.3)) print(" c =", K.eval(c).astype(np.float16)) print("qb_111 =", K.eval(quantized_bits(1,1,1)(c)).astype(np.float16)) print("qb_210 =", K.eval(quantized_bits(2,1,0)(c)).astype(np.float16)) print("qb_211 =", K.eval(quantized_bits(2,1,1)(c)).astype(np.float16)) print("qb_300 =", K.eval(quantized_bits(3,0,0)(c)).astype(np.float16)) print("qb_301 =", K.eval(quantized_bits(3,0,1)(c)).astype(np.float16)) c_1000 = K.constant(np.array([list(K.eval(c))] * 1000)) b = np.sum(K.eval(bernoulli()(c_1000)).astype(np.int32), axis=0) / 1000.0 print(" hs =", K.eval(hard_sigmoid(c)).astype(np.float16)) print(" b_all =", b.astype(np.float16)) T = 0.0 t = K.eval(stochastic_ternary(alpha="auto")(c_1000)) for i in range(10): print("stochastic_ternary({}) =".format(i), t[i]) print(" st_all =", np.round( np.sum(t.astype(np.float32), axis=0).astype(np.float16) / 1000.0, 2).astype(np.float16)) print(" ternary =", K.eval(ternary(threshold=0.5)(c)).astype(np.int32)) c = K.constant(np.arange(-1.5, 1.51, 0.3)) print(" c =", K.eval(c).astype(np.float16)) print(" b_10 =", K.eval(binary(1)(c)).astype(np.float16)) print("qr_10 =", K.eval(quantized_relu(1,0)(c)).astype(np.float16)) print("qr_11 =", K.eval(quantized_relu(1,1)(c)).astype(np.float16)) print("qr_20 =", K.eval(quantized_relu(2,0)(c)).astype(np.float16)) print("qr_21 =", K.eval(quantized_relu(2,1)(c)).astype(np.float16)) print("qr_101 =", K.eval(quantized_relu(1,0,1)(c)).astype(np.float16)) print("qr_111 =", K.eval(quantized_relu(1,1,1)(c)).astype(np.float16)) print("qr_201 =", K.eval(quantized_relu(2,0,1)(c)).astype(np.float16)) print("qr_211 =", K.eval(quantized_relu(2,1,1)(c)).astype(np.float16)) print("qt_200 =", K.eval(quantized_tanh(2,0)(c)).astype(np.float16)) print("qt_210 =", K.eval(quantized_tanh(2,1)(c)).astype(np.float16)) print("qt_201 =", K.eval(quantized_tanh(2,0,1)(c)).astype(np.float16)) print("qt_211 =", K.eval(quantized_tanh(2,1,1)(c)).astype(np.float16)) set_internal_sigmoid("smooth"); print("with smooth sigmoid") print("qr_101 =", K.eval(quantized_relu(1,0,1)(c)).astype(np.float16)) print("qr_111 =", K.eval(quantized_relu(1,1,1)(c)).astype(np.float16)) print("qr_201 =", K.eval(quantized_relu(2,0,1)(c)).astype(np.float16)) print("qr_211 =", K.eval(quantized_relu(2,1,1)(c)).astype(np.float16)) print("qt_200 =", K.eval(quantized_tanh(2,0)(c)).astype(np.float16)) print("qt_210 =", K.eval(quantized_tanh(2,1)(c)).astype(np.float16)) print("qt_201 =", K.eval(quantized_tanh(2,0,1)(c)).astype(np.float16)) print("qt_211 =", K.eval(quantized_tanh(2,1,1)(c)).astype(np.float16)) set_internal_sigmoid("real"); print("with real sigmoid") print("qr_101 =", K.eval(quantized_relu(1,0,1)(c)).astype(np.float16)) print("qr_111 =", K.eval(quantized_relu(1,1,1)(c)).astype(np.float16)) print("qr_201 =", K.eval(quantized_relu(2,0,1)(c)).astype(np.float16)) print("qr_211 =", K.eval(quantized_relu(2,1,1)(c)).astype(np.float16)) print("qt_200 =", K.eval(quantized_tanh(2,0)(c)).astype(np.float16)) print("qt_210 =", K.eval(quantized_tanh(2,1)(c)).astype(np.float16)) print("qt_201 =", K.eval(quantized_tanh(2,0,1)(c)).astype(np.float16)) print("qt_211 =", K.eval(quantized_tanh(2,1,1)(c)).astype(np.float16)) set_internal_sigmoid("hard") print(" c =", K.eval(c).astype(np.float16)) print("q2_31 =", K.eval(quantized_po2(3,1)(c)).astype(np.float16)) print("q2_32 =", K.eval(quantized_po2(3,2)(c)).astype(np.float16)) print("qr2_21 =", K.eval(quantized_relu_po2(2,1)(c)).astype(np.float16)) print("qr2_22 =", K.eval(quantized_relu_po2(2,2)(c)).astype(np.float16)) print("qr2_44 =", K.eval(quantized_relu_po2(4,1)(c)).astype(np.float16)) # stochastic rounding c = K.constant(np.arange(-1.5, 1.51, 0.3)) print("q2_32_2 =", K.eval(quantized_relu_po2(32,2)(c)).astype(np.float16)) b = K.eval(stochastic_binary()(c_1000)).astype(np.int32) for i in range(5): print("sbinary({}) =".format(i), b[i]) print("sbinary =", np.round(np.sum(b, axis=0) / 1000.0, 2).astype(np.float16)) print(" binary =", K.eval(binary()(c)).astype(np.int32)) print(" c =", K.eval(c).astype(np.float16)) for i in range(10): print(" s_bin({}) =".format(i), K.eval(binary(use_stochastic_rounding=1)(c)).astype(np.int32)) for i in range(10): print(" s_po2({}) =".format(i), K.eval(quantized_po2(use_stochastic_rounding=1)(c)).astype(np.int32)) for i in range(10): print( " s_relu_po2({}) =".format(i), K.eval(quantized_relu_po2(use_stochastic_rounding=1)(c)).astype( np.int32))
def test_qconv1d(layer_cls): np.random.seed(33) if layer_cls == "QConv1D": x = Input(( 4, 4, )) y = QConv1D(2, 1, kernel_quantizer=quantized_bits(6, 2, 1, alpha=1.0), bias_quantizer=quantized_bits(4, 0, 1), name='qconv1d')(x) model = Model(inputs=x, outputs=y) else: x = Input(( 4, 4, )) y = QSeparableConv1D(2, 2, depthwise_quantizer=quantized_bits(6, 2, 1, alpha=1.0), pointwise_quantizer=quantized_bits(4, 0, 1, alpha=1.0), bias_quantizer=quantized_bits(4, 0, 1), name='qconv1d')(x) model = Model(inputs=x, outputs=y) # Extract model operations model_ops = extract_model_operations(model) # Check the input layer model operation was found correctly assert model_ops['qconv1d']['type'][0] != 'null' # Assertion about the number of operations for this (Separable)Conv1D layer if layer_cls == "QConv1D": assert model_ops['qconv1d']['number_of_operations'] == 32 else: assert model_ops['qconv1d']['number_of_operations'] == 30 # Print qstats to make sure it works with Conv1D layer print_qstats(model) # reload the model to ensure saving/loading works # json_string = model.to_json() # clear_session() # model = quantized_model_from_json(json_string) for layer in model.layers: all_weights = [] for i, weights in enumerate(layer.get_weights()): input_size = np.prod(layer.input.shape.as_list()[1:]) if input_size is None: input_size = 10 * 10 shape = weights.shape assert input_size > 0, 'input size for {} {}'.format(layer.name, i) all_weights.append( 10.0 * np.random.normal(0.0, np.sqrt(2.0 / input_size), shape)) if all_weights: layer.set_weights(all_weights) # Save the model as an h5 file using Keras's model.save() fd, fname = tempfile.mkstemp('.h5') model.save(fname) del model # Delete the existing model # Return a compiled model identical to the previous one model = load_qmodel(fname) # Clean the created h5 file after loading the model os.close(fd) os.remove(fname) # apply quantizer to weights model_save_quantized_weights(model) inputs = np.random.rand(2, 4, 4) p = model.predict(inputs).astype(np.float16) if layer_cls == "QConv1D": y = np.array([[[-2.441, 3.816], [-3.807, -1.426], [-2.684, -1.317], [-1.659, 0.9834]], [[-4.99, 1.139], [-2.559, -1.216], [-2.285, 1.905], [-2.652, -0.467]]]).astype(np.float16) else: y = np.array([[[-2.275, -3.178], [-0.4358, -3.262], [1.987, 0.3987]], [[-0.01251, -0.376], [0.3928, -1.328], [-1.243, -2.43]]]).astype(np.float16) assert_allclose(p, y, rtol=1e-4)
#a_reg = keras.regularizers.l2(1e-1) k_reg = None a_reg = None #constraint = keras.constraints.min_max_norm(0, 1) num_nodes_h = 32 # In[20]: # QDense model """ When using concatenated dataset ---> add one more input = (5,)""" inputs = Input(shape=(4, ), name='inputs_0') #i = QActivation("quantized_relu(8,8)", name="act_i")(inputs) #hidden 1 hidden_layer = QDense(num_nodes_h, kernel_quantizer=quantized_bits(bits, integer), bias_quantizer=quantized_bits(bits, integer), kernel_regularizer=k_reg, activity_regularizer=a_reg, name="dense_2")(inputs) hidden_layer = QBatchNormalization(name='bn_2')(hidden_layer) hidden_layer = QActivation("quantized_relu(16,8)", name="relu_2")(hidden_layer) # hidden 2 hidden_layer = QDense(num_nodes_h, kernel_quantizer=quantized_bits(bits, integer), bias_quantizer=quantized_bits(bits, integer), kernel_regularizer=k_reg, activity_regularizer=a_reg, name="dense_3")(hidden_layer) hidden_layer = QBatchNormalization(name='bn_3')(hidden_layer) hidden_layer = QActivation("quantized_relu(16,8)", name="relu_3")(hidden_layer)
layer_name = layer.__class__.__name__ parameters = aq._param_size(layer) activations = aq._act_size(layer) print("Parameters {}:{}".format(layer.name,parameters)) print("Activations {}:{}".format(layer.name,activations)) total_size_params += parameters total_size_acts += activations total_size, p_size, a_size, model_size_dict = aq.compute_model_size(model) flops = get_flops(model, batch_size=1) print(f"FLOPS: {flops / 10 ** 9:.03} G") q = run_qtools.QTools(model, process="horowitz", source_quantizers=[quantized_bits(16, 6, 1)], is_inference=False, weights_path=None,keras_quantizer="fp32",keras_accumulator="fp32", for_reference=False) q.qtools_stats_print() # caculate energy of the derived data type map. energy_dict = q.pe( # whether to store parameters in dram, sram, or fixed weights_on_memory="sram", # store activations in dram or sram activations_on_memory="sram", # minimum sram size in number of bits. Let's assume a 16MB SRAM. min_sram_size=8*16*1024*1024, # whether load data from dram to sram (consider sram as a cache # for dram. If false, we will assume data will be already in SRAM rd_wr_on_io=False) # get stats of energy distribution in each layer
def test_qnetwork(): x = x_in = Input((28, 28, 1), name='input') x = QSeparableConv2D( 32, (2, 2), strides=(2, 2), depthwise_quantizer="binary", pointwise_quantizer=quantized_bits(4, 0, 1), depthwise_activation=quantized_bits(6, 2, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_0_m')( x) x = QActivation('quantized_relu(6,2,1)', name='act0_m')(x) x = QConv2D( 64, (3, 3), strides=(2, 2), kernel_quantizer="ternary", bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_1_m', activation=quantized_relu(6, 3, 1))( x) x = QConv2D( 64, (2, 2), strides=(2, 2), kernel_quantizer=quantized_bits(6, 2, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_2_m')( x) x = QActivation('quantized_relu(6,4,1)', name='act2_m')(x) x = Flatten(name='flatten')(x) x = QDense( 10, kernel_quantizer=quantized_bits(6, 2, 1), bias_quantizer=quantized_bits(4, 0, 1), name='dense')( x) x = Activation('softmax', name='softmax')(x) model = Model(inputs=[x_in], outputs=[x]) # reload the model to ensure saving/loading works json_string = model.to_json() clear_session() model = quantized_model_from_json(json_string) # generate same output for weights np.random.seed(42) for layer in model.layers: all_weights = [] for i, weights in enumerate(layer.get_weights()): input_size = np.prod(layer.input.shape.as_list()[1:]) if input_size is None: input_size = 576 * 10 # to avoid learning sizes shape = weights.shape assert input_size > 0, 'input size for {} {}'.format(layer.name, i) # he normal initialization with a scale factor of 2.0 all_weights.append( 10.0 * np.random.normal(0.0, np.sqrt(2.0 / input_size), shape)) if all_weights: layer.set_weights(all_weights) # apply quantizer to weights model_save_quantized_weights(model) all_weights = [] for layer in model.layers: for i, weights in enumerate(layer.get_weights()): w = np.sum(weights) all_weights.append(w) all_weights = np.array(all_weights) # test_qnetwork_weight_quantization all_weights_signature = np.array( [2., -6.75, -0.625, -2., -0.25, -56., 1.125, -1.625, -1.125]) assert all_weights.size == all_weights_signature.size assert np.all(all_weights == all_weights_signature) # test_qnetwork_forward: expected_output = np.array([[0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 6.e-08, 1.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00 ,0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 5.e-07, 1.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00 ,1.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00 ,0.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00]]).astype(np.float16) inputs = 2 * np.random.rand(10, 28, 28, 1) actual_output = model.predict(inputs).astype(np.float16) assert_allclose(actual_output, expected_output, rtol=1e-4)
def GetQbits(self, inp, keep_negative=1): print("Setting bits {} {} with keep negative = {}".format( inp['total'], inp['integer'], keep_negative)) return qkr.quantized_bits(bits=inp['total'], integer=inp['integer'], keep_negative=keep_negative)
def test_qnetwork(): x = x_in = Input((28, 28, 1), name='input') x = QSeparableConv2D(32, (2, 2), strides=(2, 2), depthwise_quantizer=binary(), pointwise_quantizer=quantized_bits(4, 0, 1), depthwise_activation=quantized_bits(6, 2, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_0_m')(x) x = QActivation('quantized_relu(6,2,1)', name='act0_m')(x) x = QConv2D(64, (3, 3), strides=(2, 2), kernel_quantizer=ternary(), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_1_m')(x) x = QActivation('quantized_relu(6, 3, 1)', name='act1_m')(x) x = QConv2D(64, (2, 2), strides=(2, 2), kernel_quantizer=quantized_bits(6, 2, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_2_m')(x) x = QActivation('quantized_relu(6,4,1)', name='act2_m')(x) x = Flatten(name='flatten')(x) x = QDense(10, kernel_quantizer=quantized_bits(6, 2, 1), bias_quantizer=quantized_bits(4, 0, 1), name='dense')(x) x = Activation('softmax', name='softmax')(x) model = Model(inputs=[x_in], outputs=[x]) # generate same output for weights np.random.seed(42) for layer in model.layers: all_weights = [] for i, weights in enumerate(layer.get_weights()): input_size = np.prod(layer.input.shape.as_list()[1:]) if input_size is None: input_size = 576 * 10 # hack to avoid learning sizes shape = weights.shape assert input_size > 0, 'input size for {} {}'.format(layer.name, i) # he normal initialization with a scale factor of 2.0 all_weights.append( 10.0 * np.random.normal(0.0, np.sqrt(2.0 / input_size), shape)) if all_weights: layer.set_weights(all_weights) # apply quantizer to weights model_save_quantized_weights(model) all_weights = [] for layer in model.layers: for i, weights in enumerate(layer.get_weights()): w = np.sum(weights) all_weights.append(w) all_weights = np.array(all_weights) # test_qnetwork_weight_quantization all_weights_signature = np.array( [2.0, -6.75, -0.625, -2.0, -0.25, -56.0, 1.125, -2.625, -0.75]) assert all_weights.size == all_weights_signature.size assert np.all(all_weights == all_weights_signature) # test_qnetwork_forward: y = np.array([[ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 5.341e-02, 9.468e-01, 0.000e+00, 0.000e+00, 0.000e+00 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 5.960e-08, 0.000e+00, 1.919e-01, 0.000e+00, 0.000e+00, 8.081e-01 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 2.378e-04, 0.000e+00, 0.000e+00, 0.000e+00, 2.843e-05, 9.995e-01 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 2.623e-06, 0.000e+00 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 7.749e-07, 0.000e+00, 0.000e+00, 1.634e-04, 1.000e+00 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00 ], [ 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 6.557e-07, 0.000e+00, 0.000e+00, 0.000e+00 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 5.960e-08, 0.000e+00, 0.000e+00, 0.000e+00 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 9.125e-03, 9.907e-01, 9.418e-06, 0.000e+00, 5.597e-05, 0.000e+00 ]]).astype(np.float16) inputs = 2 * np.random.rand(10, 28, 28, 1) p = model.predict(inputs).astype(np.float16) assert np.all(p == y)
def test_qconv1d(): np.random.seed(33) x = Input((4, 4,)) y = QConv1D( 2, 1, kernel_quantizer=quantized_bits(6, 2, 1), bias_quantizer=quantized_bits(4, 0, 1), name='qconv1d')( x) model = Model(inputs=x, outputs=y) #Extract model operations model_ops = extract_model_operations(model) # Assertion about the number of operations for this Conv1D layer assert model_ops['qconv1d']["number_of_operations"] == 32 # Print qstats to make sure it works with Conv1D layer print_qstats(model) # reload the model to ensure saving/loading works json_string = model.to_json() clear_session() model = quantized_model_from_json(json_string) for layer in model.layers: all_weights = [] for i, weights in enumerate(layer.get_weights()): input_size = np.prod(layer.input.shape.as_list()[1:]) if input_size is None: input_size = 10 * 10 shape = weights.shape assert input_size > 0, 'input size for {} {}'.format(layer.name, i) all_weights.append( 10.0 * np.random.normal(0.0, np.sqrt(2.0 / input_size), shape)) if all_weights: layer.set_weights(all_weights) # Save the model as an h5 file using Keras's model.save() fd, fname = tempfile.mkstemp('.h5') model.save(fname) del model # Delete the existing model # Returns a compiled model identical to the previous one model = load_qmodel(fname) #Clean the created h5 file after loading the model os.close(fd) os.remove(fname) # apply quantizer to weights model_save_quantized_weights(model) inputs = np.random.rand(2, 4, 4) p = model.predict(inputs).astype(np.float16) ''' y = np.array([[[0.1309, -1.229], [-0.4165, -2.639], [-0.08105, -2.299], [1.981, -2.195]], [[-0.3174, -3.94], [-0.3352, -2.316], [0.105, -0.833], [0.2115, -2.89]]]).astype(np.float16) ''' y = np.array([[[-2.441, 3.816], [-3.807, -1.426], [-2.684, -1.317], [-1.659, 0.9834]], [[-4.99, 1.139], [-2.559, -1.216], [-2.285, 1.905], [-2.652, -0.467]]]).astype(np.float16) assert np.all(p == y)
def test_qpooling_in_qtools(): input_size = (16, 16, 3) pool_size = (2, 2) input_quantizers = [quantized_bits(8, 0, 1)] is_inference = False x = Input(input_size) xin = x x = QAveragePooling2D(pool_size=pool_size, average_quantizer=binary(), activation=quantized_bits(4, 0, 1), name="pooling")(x) x = QGlobalAveragePooling2D(average_quantizer=quantized_bits(4, 0, 1), activation=ternary(), name="global_pooling")(x) model = Model(inputs=xin, outputs=x) (graph, source_quantizer_list) = qgraph.CreateGraph(model, input_quantizers) qgraph.GraphPropagateActivationsToEdges(graph) layer_map = generate_layer_data_type_map.generate_layer_data_type_map( graph, source_quantizer_list, is_inference) dtype_dict = interface.map_to_json(layer_map) # Checks the QAveragePpooling layer datatype multiplier = dtype_dict["pooling"]["pool_avg_multiplier"] accumulator = dtype_dict["pooling"]["pool_sum_accumulator"] average_quantizer = dtype_dict["pooling"]["average_quantizer"] output = dtype_dict["pooling"]["output_quantizer"] assert_equal(multiplier["quantizer_type"], "quantized_bits") assert_equal(multiplier["bits"], 10) assert_equal(multiplier["int_bits"], 3) assert_equal(multiplier["is_signed"], 1) assert_equal(multiplier["op_type"], "mux") assert_equal(accumulator["quantizer_type"], "quantized_bits") assert_equal(accumulator["bits"], 10) assert_equal(accumulator["int_bits"], 3) assert_equal(accumulator["is_signed"], 1) assert_equal(accumulator["op_type"], "add") assert_equal(output["quantizer_type"], "quantized_bits") assert_equal(output["bits"], 4) assert_equal(output["int_bits"], 1) assert_equal(output["is_signed"], 1) assert_equal(average_quantizer["quantizer_type"], "binary") assert_equal(average_quantizer["bits"], 1) assert_equal(average_quantizer["int_bits"], 1) assert_equal(average_quantizer["is_signed"], 1) # Checks the QGlobalAveragePooling layer datatype multiplier = dtype_dict["global_pooling"]["pool_avg_multiplier"] accumulator = dtype_dict["global_pooling"]["pool_sum_accumulator"] average_quantizer = dtype_dict["global_pooling"]["average_quantizer"] output = dtype_dict["global_pooling"]["output_quantizer"] assert_equal(multiplier["quantizer_type"], "quantized_bits") assert_equal(multiplier["bits"], 13) assert_equal(multiplier["int_bits"], 7) assert_equal(multiplier["is_signed"], 1) assert_equal(multiplier["op_type"], "mul") assert_equal(accumulator["quantizer_type"], "quantized_bits") assert_equal(accumulator["bits"], 10) assert_equal(accumulator["int_bits"], 7) assert_equal(accumulator["is_signed"], 1) assert_equal(accumulator["op_type"], "add") assert_equal(output["quantizer_type"], "ternary") assert_equal(output["bits"], 2) assert_equal(output["int_bits"], 2) assert_equal(output["is_signed"], 1) assert_equal(average_quantizer["quantizer_type"], "quantized_bits") assert_equal(average_quantizer["bits"], 4) assert_equal(average_quantizer["int_bits"], 1) assert_equal(average_quantizer["is_signed"], 1)
from qkeras import ternary from qkeras.utils import model_save_quantized_weights from qkeras.utils import quantized_model_from_json from qkeras.utils import load_qmodel from qkeras.utils import model_quantize from qkeras import print_qstats from qkeras.qtools import qgraph from qkeras.qtools import generate_layer_data_type_map from qkeras.qtools import interface @pytest.mark.parametrize( ('pooling, input_size, pool_size, strides, padding, data_format,' 'average_quantizer, activation_quantizer, y'), [ ('QAveragePooling2D', (4, 4, 3), (2, 2), (2, 2), 'valid', 'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1), np.array([[[[0.375, 0.625, 0.375], [0.25, 0.75, 0.5]], [[0.375, 0.25, 0.625], [0.625, 0.5, 0.375]]], [[[0.375, 0.375, 0.5], [0.375, 0.5, 0.625]], [[0.75, 0.625, 0.5], [0.5, 0.5, 0.75]]]]).astype( np.float16)), ('QAveragePooling2D', (4, 4, 3), (3, 3), (3, 3), 'valid', 'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1), np.array([[[[0.375, 0.625, 0.625]]], [[[0.625, 0.5, 0.625]]] ]).astype(np.float16)), ('QGlobalAveragePooling2D', (4, 4, 3), (2, 2), (2, 2), 'valid', 'channels_last', quantized_bits(10, 0, 1), quantized_bits(4, 0, 1), np.array([[0.5, 0.5, 0.375], [0.5, 0.5, 0.625]]).astype(np.float16)), ('QAveragePooling2D', (4, 4, 3), (2, 2), (3, 3), 'valid', 'channels_last', quantized_bits(4, 0, 1), quantized_bits(4, 0, 1), np.array([[[[0.375, 0.625, 0.375]]], [[[0.375, 0.375, 0.5]]]
from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from numpy.testing import assert_allclose import pytest from qkeras import quantized_bits from qkeras.codebook import weight_compression @pytest.mark.parametrize( 'bits, axis, quantizer, weights, expected_result', [(3, 3, quantized_bits(4, 0, 1, alpha='auto_po2'), np.array([[[0.14170583, -0.34360626, 0.29548156], [0.6517242, 0.06870092, -0.21646781], [0.12486842, -0.05406165, -0.23690471]], [[-0.07540564, 0.2123149, 0.2382695], [0.78434753, 0.36171672, -0.43612534], [0.3685556, 0.41328752, -0.48990643]], [[-0.04438099, 0.0590747, -0.0644061], [0.15280165, 0.40714318, -0.04622072], [0.21560416, -0.22131851, -0.5365659]]], dtype=np.float32), np.array([[[0.125, -0.3125, 0.25], [0.4375, 0.125, -0.25], [0.125, -0.0625, -0.25]], [[-0.0625, 0.25, 0.25], [0.4375, 0.375, -0.4375], [0.375, 0.4375, -0.4375]], [[-0.0625, 0.125, -0.0625], [0.125, 0.4375, -0.0625],
def test_quantized_bits_range(bits, integer, expected_values): """Test quantized_bits range function.""" q = quantized_bits(bits, integer) result = q.range() assert_allclose(result, expected_values, rtol=1e-05)
def __call__(self, inputs): """Residual unit.""" x, y = inputs n_samples_in = y.shape[ 1] #.value ### BEFORE THERE WAS NO COMMENT HERE downsample = n_samples_in // self.n_samples_out n_filters_in = y.shape[ 2] #.value ### BEFORE THERE WAS NO COMMENT HERE y = self._skip_connection(y, downsample, n_filters_in) # 1st layer x = QConv1D(self.n_filters_out, self.kernel_size, padding='same', use_bias=False, kernel_initializer=self.kernel_initializer, kernel_quantizer=quantized_bits(bits=10, integer=2, symmetric=0, keep_negative=1), bias_quantizer=quantized_bits(bits=10, integer=2, symmetric=0, keep_negative=1))(x) x = QActivation( "quantized_bits(bits=13, integer=2, symmetric=0, keep_negative=1)" )(x) x = self._batch_norm_plus_activation(x) if self.dropout_rate > 0: x = Dropout(self.dropout_rate)(x) # 2nd layer x = QConv1D(self.n_filters_out, self.kernel_size, strides=downsample, padding='same', use_bias=False, kernel_initializer=self.kernel_initializer, kernel_quantizer=quantized_bits(bits=10, integer=2, symmetric=0, keep_negative=1), bias_quantizer=quantized_bits(bits=10, integer=2, symmetric=0, keep_negative=1))(x) if self.preactivation: x = Add()([x, y]) # Sum skip connection and main connection y = x x = self._batch_norm_plus_activation(x) if self.dropout_rate > 0: x = Dropout(self.dropout_rate)(x) else: x = BatchNormalization()(x) x = Add()([x, y]) # Sum skip connection and main connection x = Activation(self.activation_function)(x) x = QActivation( "quantized_bits(bits=13, integer=2, symmetric=0, keep_negative=1)" )(x) if self.dropout_rate > 0: x = Dropout(self.dropout_rate)(x) y = x return [x, y]