def QDenseModel(weights_f, load_weights=False): """Construct QDenseModel.""" x = x_in = Input((RESHAPED, ), name="input") x = QActivation("quantized_relu(4)", name="act_i")(x) x = QDense(N_HIDDEN, kernel_quantizer=ternary(), bias_quantizer=quantized_bits(4, 0, 1), name="dense0")(x) x = QActivation("quantized_relu(2)", name="act0")(x) x = QDense(NB_CLASSES, kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name="dense2")(x) x = Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) model.summary() model.compile(loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"]) if load_weights and weights_f: model.load_weights(weights_f) print_qstats(model) return model
def maximum_qmodel(quantizer1, quantizer2, quantizer3): # element-wise maximum/minimum/average of a list of inputs. # It takes as input a list of tensors, all of the same shape, # and returns a single tensor (also of the same shape). x1 = input1 = keras.layers.Input((16, ), name="input_0") x1 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizer1, name="qdense_0")(x1) x2 = input2 = keras.layers.Input(shape=(32, ), name="input_1") x2 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizer2, name="dense_1")(x2) x3 = input3 = keras.layers.Input(shape=(64, ), name="input_2") x3 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizer3, name="dense_2")(x3) x = keras.layers.maximum([x1, x2, x3], name="maximum") model = keras.Model(inputs=[input1, input2, input3], outputs=[x]) return model
def multiply_qmodel(): # element-wise multiply a list of inputs. # It takes as input a list of tensors, all of the same shape, # and returns a single tensor (also of the same shape). x1 = input1 = keras.layers.Input((16, ), name="input_0") x1 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_bits(4, 0, 1), name="dense_0")(x1) x2 = input2 = keras.layers.Input(shape=(32, ), name="input_1") x2 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_bits(5, 0, 1), name="dense_1")(x2) x3 = input3 = keras.layers.Input(shape=(64, ), name="input_2") x3 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_bits(6, 0, 1), name="dense_2")(x3) x = keras.layers.multiply([x1, x2, x3], name="multiply") model = keras.Model(inputs=[input1, input2, input3], outputs=[x]) return model
def QDenseModel(weights_f, load_weights=False): """Construct QDenseModel.""" x = x_in = Input((28 * 28, ), name="input") x = QActivation("quantized_relu(2)", name="act_i")(x) x = Dense(100, name="d0")(x) x = BatchNormalization(name="bn0")(x) x = QActivation("quantized_relu(2)", name="act0_m")(x) x = Flatten(name="flatten")(x) x = QDense(NB_CLASSES, kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name="dense2")(x) x = Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) model.summary() model.compile(loss="categorical_crossentropy", optimizer=OPTIMIZER, metrics=["accuracy"]) if load_weights and weights_f: model.load_weights(weights_f) return model
def build_model(input_shape): x = x_in = Input(shape=input_shape, name="input") x = QConv2D( 32, (2, 2), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_0_m")(x) x = QActivation("quantized_relu(4,0)", name="act0_m")(x) x = QConv2D( 64, (3, 3), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_1_m")(x) x = QActivation("quantized_relu(4,0)", name="act1_m")(x) x = QConv2D( 64, (2, 2), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_2_m")(x) x = QActivation("quantized_relu(4,0)", name="act2_m")(x) x = Flatten()(x) x = QDense(num_classes, kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="dense")(x) x = Activation("softmax", name="softmax")(x) model = Model(inputs=[x_in], outputs=[x]) return model
def test_qbidirectional(rnn, all_weights_signature, expected_output): K.set_learning_phase(0) np.random.seed(22) tf.random.set_seed(22) x = x_in = Input((2, 4), name='input') x = QBidirectional( rnn(16, activation="quantized_po2(8)", kernel_quantizer="quantized_po2(8)", recurrent_quantizer="quantized_po2(8)", bias_quantizer="quantized_po2(8)", name='qbirnn_0'))(x) x = QDense(4, kernel_quantizer=quantized_bits(8, 2, 1, alpha=1.0), bias_quantizer=quantized_bits(8, 0, 1), name='dense')(x) x = Activation('softmax', name='softmax')(x) model = Model(inputs=[x_in], outputs=[x]) # reload the model to ensure saving/loading works json_string = model.to_json() clear_session() model = quantized_model_from_json(json_string) # Save the model as an h5 file using Keras's model.save() fd, fname = tempfile.mkstemp('.h5') model.save(fname) del model # Delete the existing model # Return a compiled model identical to the previous one model = load_qmodel(fname) # Clean the created h5 file after loading the model os.close(fd) os.remove(fname) # apply quantizer to weights model_save_quantized_weights(model) all_weights = [] for layer in model.layers: for i, weights in enumerate(layer.get_weights()): w = np.sum(weights) all_weights.append(w) all_weights = np.array(all_weights) assert all_weights.size == all_weights_signature.size assert np.all(all_weights == all_weights_signature) # test forward: inputs = 2 * np.random.rand(10, 2, 4) actual_output = model.predict(inputs).astype(np.float16) assert_allclose(actual_output, expected_output, rtol=1e-4)
def hybrid_model(): """hybrid model that mixes qkeras and keras layers.""" x = x_in = keras.layers.Input((784, ), name="input") x = keras.layers.Dense(300, name="d0")(x) x = keras.layers.Activation("relu", name="d0_act")(x) x = QDense(100, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="d1")(x) x = QActivation("quantized_relu(4,0)", name="d1_qr4")(x) x = QDense(10, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="d2")(x) x = keras.layers.Activation("softmax", name="softmax")(x) return keras.Model(inputs=[x_in], outputs=[x])
def build_baseline(image_size=16, nclasses=5,filters = [8,8,16]): inputs = tf.keras.Input((16),name="Input") x = QDense(64, kernel_quantizer = quantized_bits(4,0,1), bias_quantizer = quantized_bits(4,0,1),name="qdense_1")(inputs) x = QActivation('quantized_relu(4,2)',name="qact_1")(x) x = QDense(32, kernel_quantizer = 'ternary', bias_quantizer = 'ternary',name="qdense_2")(x) x = QActivation('quantized_relu(3,1)',name="qact_2")(x) x = QDense(32, kernel_quantizer = quantized_bits(2,1,1), bias_quantizer = quantized_bits(2,1,1),name="qdense_3")(x) x = QActivation('quantized_relu(4,2)',name="qact_3")(x) x = QDense(5, kernel_quantizer = 'stochastic_binary', bias_quantizer = quantized_bits(8,3,1),name="qdense_nclasses")(x) predictions = tf.keras.layers.Activation('softmax',name="softmax")(x) model = tf.keras.Model(inputs, predictions,name='baseline') return model
def qkeras_cnn(name_, Inputs, nclasses, filters, kernel, strides, pooling, dropout, activation, pruning_params={}, qb=quantized_bits(6, 0, alpha=1)): length = len(filters) if any( len(lst) != length for lst in [filters, kernel, strides, pooling, dropout]): sys.exit( "One value for stride and kernel must be added for each filter! Exiting" ) x = x_in = Inputs x = BatchNormalization()(x) x = ZeroPadding2D(padding=(1, 1), data_format="channels_last")(x) for i, (f, k, s, p, d) in enumerate(zip(filters, kernel, strides, pooling, dropout)): print(( "Adding layer with {} filters, kernel_size=({},{}), strides=({},{})" ).format(f, k, k, s, s)) x = QConv2D(int(f), kernel_size=(int(k), int(k)), strides=(int(s), int(s)), kernel_quantizer=qb, bias_quantizer=qb, kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001), use_bias=False, name='conv_%i' % i)(x) if float(p) != 0: x = MaxPooling2D(pool_size=(int(p), int(p)))(x) x = BatchNormalization()(x) x = Activation(activation, name='conv_act_%i' % i)(x) x = Flatten()(x) x = QDense(128, kernel_quantizer=qb, bias_quantizer=qb, kernel_initializer='lecun_uniform', kernel_regularizer=l1(0.0001), name='dense_1', use_bias=False)(x) x = Dropout(0.25)(x) x = BatchNormalization()(x) x = Activation(activation, name='dense_act')(x) x_out = Dense(nclasses, activation='softmax', name='output')(x) model = Model(inputs=[x_in], outputs=[x_out], name=name_) return model
def test_populate_bias_quantizer_from_accumulator(): """Test populate_bias_quantizer_from_accumulator function. Define a qkeras model with a QConv2DBatchnorm layer. Set bias quantizer in the layer as None. Call populate_bias_quantizer_from_accumulator function to automatically generate bias quantizer type from the MAC accumulator type. Set the bias quantizer accordingly in the model. Call populate_bias_quantizer_from_accumulator again in this model. This time since bias quantizer is already set, populate_bias_quantizer_from_accumulator function should not change the bias quantizer. """ x_shape = (2, 2, 1) # get a qkeras model with QConv2DBatchnorm layer. Set bias quantizer in the # layer as None. x = x_in = layers.Input(x_shape, name="input") x1 = QConv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), use_bias=False, kernel_quantizer="quantized_bits(4, 0, 1)", name="conv2d_1")(x) x2 = QConv2D(filters=1, kernel_size=(1, 1), strides=(1, 1), use_bias=False, kernel_quantizer="quantized_bits(4, 0, 1)", name="conv2d_2")(x) x = layers.Maximum()([x1, x2]) x = QActivation("quantized_relu(4, 1)")(x) x = QConv2DBatchnorm( filters=2, kernel_size=(2, 2), strides=(4, 4), kernel_initializer="ones", bias_initializer="zeros", use_bias=False, kernel_quantizer="quantized_bits(4, 0, 1)", bias_quantizer=None, beta_initializer="zeros", gamma_initializer="ones", moving_mean_initializer="zeros", moving_variance_initializer="ones", folding_mode="batch_stats_folding", ema_freeze_delay=10, name="foldconv2d")(x) x1 = x x2 = layers.Flatten(name="flatten")(x) x2 = QDense(2, use_bias=False, kernel_initializer="ones", kernel_quantizer="quantized_bits(6, 2, 1)", name="dense")(x2) model = Model(inputs=[x_in], outputs=[x1, x2]) assert_equal(model.layers[5].get_quantizers()[1], None) # Call populate_bias_quantizer_from_accumulator function # to automatically generate bias quantizer from the MAC accumulator type. _ = bn_folding_utils.populate_bias_quantizer_from_accumulator( model, ["quantized_bits(8, 0, 1)"]) q = model.layers[5].get_quantizers()[1] assert_equal(q.__str__(), "quantized_bits(10,3,1)") # Call populate_bias_quantizer_from_accumulator function again # bias quantizer should not change _ = bn_folding_utils.populate_bias_quantizer_from_accumulator( model, ["quantized_bits(8, 0, 1)"]) q = model.layers[5].get_quantizers()[1] assert_equal(q.__str__(), "quantized_bits(10,3,1)")
def test_dense(): data = getData() nBits = 8 nBitsInt = 4 qbits_param_input = qkr.quantized_bits(bits=nBits, integer=nBitsInt, keep_negative=0) qbits_param = qkr.quantized_bits(bits=nBits, integer=nBitsInt, keep_negative=1) # simple model only quantizes inputs = Input(shape=(4, 4, 3)) x = inputs x = Flatten(name="flatten")(x) x = QActivation(qbits_param_input, name='q_decoder_output')(x) encodedLayer = QDense(10, activation='relu', name='encoded_vector', kernel_quantizer=qbits_param, bias_quantizer=qbits_param)(x) model = Model(inputs, encodedLayer, name='encoder') model.summary() model.compile(loss='mse', optimizer='adam') val_input, train_input = split(data, 0.5) train_output = np.ones(50).reshape(5, 10) # garbage outputs for training val_output = np.ones(50).reshape(5, 10) # garbage outputs for validation es = kr.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3) history = model.fit(train_input, train_output, epochs=1, batch_size=500, shuffle=True, validation_data=(val_input, val_output), callbacks=[es]) val_output = model.predict(val_input) print('\nTEST DENSE') print('\nRaw validation output: \n', val_output) print( '\nMultiplied by 2^(decimal bits): \n Results should be integers * weight precision... \n', val_output * (2**(nBits - nBitsInt))) return
def test_wrong_input_quantizers(): input_quantizers = [ quantizers.quantized_bits(4, 0, 1), quantizers.quantized_bits(5, 0, 1), quantizers.quantized_bits(6, 0, 1) ] # INPUT_QUANTIZERS = None x1 = input1 = keras.layers.Input((16, ), name="input_0") x1 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), name="dense_0")(x1) x2 = input2 = keras.layers.Input(shape=(32, ), name="input_1") x2 = QDense(8, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), name="dense_1")(x2) x = keras.layers.add([x1, x2], name="add") model = keras.Model(inputs=[input1, input2], outputs=[x]) with pytest.raises(qgraph.WrongInputQuantizerError): run(model, input_quantizers)
def qdense_model_fork(): x = x_in = keras.layers.Input((23, ), name="input") x = QDense(10, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_po2(3, 1), name="qdense_0")(x) x = QDense(20, kernel_quantizer=quantizers.quantized_bits(5, 0, 1), bias_quantizer=quantizers.quantized_bits(5, 0, 1), activation=quantizers.quantized_relu(6, 2), name="qdense_1")(x) x = QActivation("quantized_relu(4)", name="QA_2")(x) x_1 = QDense(30, kernel_quantizer=quantizers.binary(), bias_quantizer=quantizers.binary(), name="qdense_3")(x) x_2 = QActivation("quantized_relu(6,2)", name="QA_3")(x) model = keras.Model(inputs=[x_in], outputs=[ x_1, x_2, ]) return model
def quantized_cnn(Inputs, nclasses, filters, kernel, strides, pooling, dropout, activation="quantized_relu(32,16)", quantizer_cnn=quantized_bits(1), quantizer_dense=quantized_bits(1)): length = len(filters) if any( len(lst) != length for lst in [filters, kernel, strides, pooling, dropout]): sys.exit( "One value for stride and kernel must be added for each filter! Exiting" ) x = x_in = Inputs for i, (f, k, s, p, d) in enumerate(zip(filters, kernel, strides, pooling, dropout)): print(( "Adding layer with {} filters, kernel_size=({},{}), strides=({},{})" ).format(f, k, k, s, s)) x = QConv2D(int(f), kernel_size=(int(k), int(k)), strides=(int(s), int(s)), kernel_quantizer=quantizer_cnn, bias_quantizer=quantizer_cnn, name='conv_%i' % i)(x) x = QActivation(activation)(x) x = BatchNormalization()(x) if float(p) != 0: x = MaxPooling2D(pool_size=(int(p), int(p)))(x) # x = Dropout(float(d))(x) x = Flatten()(x) x = QDense(128, kernel_quantizer=quantizer_dense, bias_quantizer=quantizer_dense)(x) x = QActivation(activation)(x) x = BatchNormalization()(x) x = Dense(nclasses)(x) model = Model(inputs=[x_in], outputs=[x]) return model
def gen_model(img_shape): img_input = x = keras.Input(shape=img_shape) x = QConv2D(filters=5, kernel_size=4, strides=4, kernel_quantizer=quantizers.quantized_bits( 8, 3, alpha="auto_po2"), bias_quantizer=quantizers.quantized_bits(8, 3), name="conv")(x) x = QActivation(activation=quantizers.quantized_relu(4, 0), name="act")(x) x = keras.layers.Flatten(name="flatten")(x) x = QDense(5, kernel_quantizer=quantizers.quantized_bits( 8, 0, alpha="auto_po2"), bias_quantizer=quantizers.quantized_bits(8, 3), name="dense")(x) model = keras.Model(inputs=img_input, outputs=[x]) return model
def test_sequential_qnetwork(): model = tf.keras.Sequential() model.add(Input((28, 28, 1), name='input')) model.add( QConv2D(32, (2, 2), strides=(2, 2), kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_0_m')) model.add(QActivation(quantized_relu(4, 0), name='act0_m')) model.add( QConv2D(64, (3, 3), strides=(2, 2), kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_1_m')) model.add(QActivation(quantized_relu(4, 0), name='act1_m')) model.add( QConv2D(64, (2, 2), strides=(2, 2), kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_2_m')) model.add(QActivation(quantized_relu(4, 0), name='act2_m')) model.add(Flatten()) model.add( QDense(10, kernel_quantizer=quantized_bits(4, 0, 1), bias_quantizer=quantized_bits(4, 0, 1), name='dense')) model.add(Activation('softmax', name='softmax')) # Check that all model operation were found correctly model_ops = extract_model_operations(model) for layer in model_ops.keys(): assert model_ops[layer]['type'][0] != 'null' return model
def test_util_layers(): input_quantizers = None # quantizers.quantized_bits(4, 0, 1) act = "quantized_bits(6, 0, 1)" x = x_in = keras.layers.Input((24, 24, 1), name="input") x = QActivation(act, name="QA_0")(x) x = keras.layers.Reshape((12 * 12, 4, 1), name="reshape_1")(x) x = keras.layers.MaxPooling2D(pool_size=(2, 2), name="maxpooling_2")(x) x = keras.layers.Flatten(name="flatten_3")(x) x = QDense(30, kernel_quantizer=quantizers.binary(use_01=1), bias_quantizer=quantizers.binary(use_01=1), activation=quantizers.quantized_po2(3, 2), name="qdense_4")(x) model = keras.Model(inputs=[x_in], outputs=[x]) dtype_dict = run(model, input_quantizers) multiplier = dtype_dict["qdense_4"]["multiplier"] assert multiplier["quantizer_type"] == "quantized_bits" assert multiplier["bits"] == 6 assert multiplier["int_bits"] == 1 assert multiplier["is_signed"] == 1 assert multiplier["op_type"] == "and" accumulator = dtype_dict["qdense_4"]["accumulator"] assert accumulator["quantizer_type"] == "quantized_bits" assert accumulator["bits"] == 15 assert accumulator["int_bits"] == 10 assert accumulator["is_signed"] == 1 assert accumulator["op_type"] == "add" output = dtype_dict["qdense_4"]["output_quantizer"] assert output["quantizer_type"] == "quantized_po2" assert output["bits"] == 3 assert output["is_signed"] == 1 assert output["max_value"] == 2
def build_layerwise_model(input_shape, **pruning_params): return Sequential([ prune.prune_low_magnitude( QConv2D( 32, (2, 2), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_0_m"), input_shape=input_shape, **pruning_params), QActivation("quantized_relu(4,0)", name="act0_m"), prune.prune_low_magnitude( QConv2D( 64, (3, 3), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_1_m"), **pruning_params), QActivation("quantized_relu(4,0)", name="act1_m"), prune.prune_low_magnitude( QConv2D( 64, (2, 2), strides=(2,2), kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="conv2d_2_m"), **pruning_params), QActivation("quantized_relu(4,0)", name="act2_m"), Flatten(), prune.prune_low_magnitude( QDense( num_classes, kernel_quantizer=quantized_bits(4,0,1), bias_quantizer=quantized_bits(4,0,1), name="dense"), **pruning_params), Activation("softmax", name="softmax") ])
k_reg = None a_reg = None #constraint = keras.constraints.min_max_norm(0, 1) num_nodes_h = 32 # In[20]: # QDense model """ When using concatenated dataset ---> add one more input = (5,)""" inputs = Input(shape=(4, ), name='inputs_0') #i = QActivation("quantized_relu(8,8)", name="act_i")(inputs) #hidden 1 hidden_layer = QDense(num_nodes_h, kernel_quantizer=quantized_bits(bits, integer), bias_quantizer=quantized_bits(bits, integer), kernel_regularizer=k_reg, activity_regularizer=a_reg, name="dense_2")(inputs) hidden_layer = QBatchNormalization(name='bn_2')(hidden_layer) hidden_layer = QActivation("quantized_relu(16,8)", name="relu_2")(hidden_layer) # hidden 2 hidden_layer = QDense(num_nodes_h, kernel_quantizer=quantized_bits(bits, integer), bias_quantizer=quantized_bits(bits, integer), kernel_regularizer=k_reg, activity_regularizer=a_reg, name="dense_3")(hidden_layer) hidden_layer = QBatchNormalization(name='bn_3')(hidden_layer) hidden_layer = QActivation("quantized_relu(16,8)", name="relu_3")(hidden_layer) # hidden 3 hidden_layer = QDense(num_nodes_h,
kernel_size=kernel_size, kernel_initializer=kernel_initializer)([x, x]) x, y = ResidualUnit(128, 196, kernel_size=kernel_size, kernel_initializer=kernel_initializer)([x, y]) x, y = ResidualUnit(32, 256, kernel_size=kernel_size, kernel_initializer=kernel_initializer)([x, y]) x, _ = ResidualUnit(8, 320, kernel_size=kernel_size, kernel_initializer=kernel_initializer)([x, y]) x = Flatten()(x) diagn = QDense(5, activation='sigmoid', kernel_initializer=kernel_initializer, kernel_quantizer=quantized_bits(bits=10, integer=2, symmetric=0, keep_negative=1), bias_quantizer=quantized_bits(bits=10, integer=2, symmetric=0, keep_negative=1))(x) model = Model(signal, diagn) # ----------------- # if __name__ == "__main__": model.summary()
def init( self, printSummary=True ): # keep_negitive = 0 on inputs, otherwise for weights keep default (=1) encoded_dim = self.pams['encoded_dim'] CNN_layer_nodes = self.pams['CNN_layer_nodes'] CNN_kernel_size = self.pams['CNN_kernel_size'] CNN_pool = self.pams['CNN_pool'] Dense_layer_nodes = self.pams[ 'Dense_layer_nodes'] # does not include encoded layer channels_first = self.pams['channels_first'] inputs = Input( shape=self.pams['shape'] ) # adapt this if using `channels_first` image data format # load bits to quantize nBits_input = self.pams['nBits_input'] nBits_accum = self.pams['nBits_accum'] nBits_weight = self.pams['nBits_weight'] nBits_encod = self.pams['nBits_encod'] nBits_dense = self.pams[ 'nBits_dense'] if 'nBits_dense' in self.pams else nBits_weight nBits_conv = self.pams[ 'nBits_conv'] if 'nBits_conv' in self.pams else nBits_weight input_Qbits = self.GetQbits( nBits_input, keep_negative=1) #oddly fails if keep_neg=0 accum_Qbits = self.GetQbits(nBits_accum, keep_negative=1) dense_Qbits = self.GetQbits(nBits_dense, keep_negative=1) conv_Qbits = self.GetQbits(nBits_conv, keep_negative=1) encod_Qbits = self.GetQbits(nBits_encod, keep_negative=1) # keeping weights and bias same precision for now # define model x = inputs x = QActivation(input_Qbits, name='input_qa')(x) for i, n_nodes in enumerate(CNN_layer_nodes): if channels_first: x = QConv2D(n_nodes, CNN_kernel_size[i], activation='relu', padding='same', data_format='channels_first', name="conv2d_" + str(i) + "_m", kernel_quantizer=conv_Qbits, bias_quantizer=conv_Qbits)(x) else: x = QConv2D(n_nodes, CNN_kernel_size[i], activation='relu', padding='same', name="conv2d_" + str(i) + "_m", kernel_quantizer=conv_Qbits, bias_quantizer=conv_Qbits)(x) if CNN_pool[i]: if channels_first: x = MaxPooling2D((2, 2), padding='same', data_format='channels_first', name="mp_" + str(i))(x) else: x = MaxPooling2D((2, 2), padding='same', name="mp_" + str(i))(x) shape = K.int_shape(x) x = QActivation(accum_Qbits, name='accum1_qa')(x) x = Flatten(name="flatten")(x) # encoder dense nodes for i, n_nodes in enumerate(Dense_layer_nodes): x = QDense(n_nodes, activation='relu', name="en_dense_" + str(i), kernel_quantizer=dense_Qbits, bias_quantizer=dense_Qbits)(x) x = QDense(encoded_dim, activation='relu', name='encoded_vector', kernel_quantizer=dense_Qbits, bias_quantizer=dense_Qbits)(x) encodedLayer = QActivation(encod_Qbits, name='encod_qa')(x) # Instantiate Encoder Model self.encoder = Model(inputs, encodedLayer, name='encoder') if printSummary: self.encoder.summary() encoded_inputs = Input(shape=(encoded_dim, ), name='decoder_input') x = encoded_inputs # decoder dense nodes for i, n_nodes in enumerate(Dense_layer_nodes): x = Dense(n_nodes, activation='relu', name="de_dense_" + str(i))(x) x = Dense(shape[1] * shape[2] * shape[3], activation='relu', name='de_dense_final')(x) x = Reshape((shape[1], shape[2], shape[3]), name="de_reshape")(x) for i, n_nodes in enumerate(CNN_layer_nodes): if CNN_pool[i]: if channels_first: x = UpSampling2D((2, 2), data_format='channels_first', name="up_" + str(i))(x) else: x = UpSampling2D((2, 2), name="up_" + str(i))(x) if channels_first: x = Conv2DTranspose(n_nodes, CNN_kernel_size[i], activation='relu', padding='same', data_format='channels_first', name="conv2D_t_" + str(i))(x) else: x = Conv2DTranspose(n_nodes, CNN_kernel_size[i], activation='relu', padding='same', name="conv2D_t_" + str(i))(x) if channels_first: # shape[0] will be # of channel x = Conv2DTranspose(filters=self.pams['shape'][0], kernel_size=CNN_kernel_size[0], padding='same', data_format='channels_first', name="conv2d_t_final")(x) else: x = Conv2DTranspose(filters=self.pams['shape'][2], kernel_size=CNN_kernel_size[0], padding='same', name="conv2d_t_final")(x) x = QActivation(input_Qbits, name='q_decoder_output')(x) #Verify this step needed? outputs = Activation('sigmoid', name='decoder_output')(x) self.decoder = Model(encoded_inputs, outputs, name='decoder') if printSummary: self.decoder.summary() self.autoencoder = Model(inputs, self.decoder(self.encoder(inputs)), name='autoencoder') if printSummary: self.autoencoder.summary() if self.pams['loss'] == "weightedMSE": self.autoencoder.compile(loss=self.weightedMSE, optimizer='adam') self.encoder.compile(loss=self.weightedMSE, optimizer='adam') elif self.pams['loss'] != '': self.autoencoder.compile(loss=self.pams['loss'], optimizer='adam') self.encoder.compile(loss=self.pams['loss'], optimizer='adam') else: self.autoencoder.compile(loss='mse', optimizer='adam') self.encoder.compile(loss='mse', optimizer='adam') CNN_layers = '' if len(CNN_layer_nodes) > 0: CNN_layers += '_Conv' for i, n in enumerate(CNN_layer_nodes): CNN_layers += f'_{n}x{CNN_kernel_size[i]}' if CNN_pool[i]: CNN_layers += 'pooled' Dense_layers = '' if len(Dense_layer_nodes) > 0: Dense_layers += '_Dense' for n in Dense_layer_nodes: Dense_layers += f'_{n}' self.name = f'Autoencoded{CNN_layers}{Dense_layers}_Encoded_{encoded_dim}' if not self.weights_f == '': self.autoencoder.load_weights(self.weights_f)
def test_qnetwork(): x = x_in = Input((28, 28, 1), name='input') x = QSeparableConv2D( 32, (2, 2), strides=(2, 2), depthwise_quantizer="binary", pointwise_quantizer=quantized_bits(4, 0, 1), depthwise_activation=quantized_bits(6, 2, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_0_m')( x) x = QActivation('quantized_relu(6,2,1)', name='act0_m')(x) x = QConv2D( 64, (3, 3), strides=(2, 2), kernel_quantizer="ternary", bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_1_m', activation=quantized_relu(6, 3, 1))( x) x = QConv2D( 64, (2, 2), strides=(2, 2), kernel_quantizer=quantized_bits(6, 2, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_2_m')( x) x = QActivation('quantized_relu(6,4,1)', name='act2_m')(x) x = Flatten(name='flatten')(x) x = QDense( 10, kernel_quantizer=quantized_bits(6, 2, 1), bias_quantizer=quantized_bits(4, 0, 1), name='dense')( x) x = Activation('softmax', name='softmax')(x) model = Model(inputs=[x_in], outputs=[x]) # reload the model to ensure saving/loading works json_string = model.to_json() clear_session() model = quantized_model_from_json(json_string) # generate same output for weights np.random.seed(42) for layer in model.layers: all_weights = [] for i, weights in enumerate(layer.get_weights()): input_size = np.prod(layer.input.shape.as_list()[1:]) if input_size is None: input_size = 576 * 10 # to avoid learning sizes shape = weights.shape assert input_size > 0, 'input size for {} {}'.format(layer.name, i) # he normal initialization with a scale factor of 2.0 all_weights.append( 10.0 * np.random.normal(0.0, np.sqrt(2.0 / input_size), shape)) if all_weights: layer.set_weights(all_weights) # apply quantizer to weights model_save_quantized_weights(model) all_weights = [] for layer in model.layers: for i, weights in enumerate(layer.get_weights()): w = np.sum(weights) all_weights.append(w) all_weights = np.array(all_weights) # test_qnetwork_weight_quantization all_weights_signature = np.array( [2., -6.75, -0.625, -2., -0.25, -56., 1.125, -1.625, -1.125]) assert all_weights.size == all_weights_signature.size assert np.all(all_weights == all_weights_signature) # test_qnetwork_forward: expected_output = np.array([[0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 6.e-08, 1.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00 ,0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 5.e-07, 1.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00 ,1.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00 ,0.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00], [0.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00]]).astype(np.float16) inputs = 2 * np.random.rand(10, 28, 28, 1) actual_output = model.predict(inputs).astype(np.float16) assert_allclose(actual_output, expected_output, rtol=1e-4)
def test_qenergy(): x = x_in = keras.layers.Input((784, ), name="input") x = QDense(300, kernel_quantizer=quantizers.binary(), bias_quantizer=quantizers.binary(), name="d0")(x) x = QActivation("quantized_relu(4,0)", name="d0_qr4")(x) x = QDense(100, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="d1")(x) x = QAdaptiveActivation("quantized_relu", 4, name="d1_qr4")(x) x = QDense(10, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="d2")(x) x = keras.layers.Activation("softmax", name="softmax")(x) model = keras.Model(inputs=[x_in], outputs=[x]) # print(model.summary()) reference_internal = "int8" reference_accumulator = "int32" # get reference energy cost q = run_qtools.QTools(model, process="horowitz", source_quantizers=reference_internal, is_inference=False, weights_path=None, keras_quantizer=reference_internal, keras_accumulator=reference_accumulator, for_reference=True) ref_energy_dict = q.pe(weights_on_memory="sram", activations_on_memory="sram", min_sram_size=8 * 16 * 1024 * 1024, rd_wr_on_io=False) reference_size = q.extract_energy_sum(qtools_settings.cfg.include_energy, ref_energy_dict) # get trial energy cost q = run_qtools.QTools(model, process="horowitz", source_quantizers=reference_internal, is_inference=False, weights_path=None, keras_quantizer=reference_internal, keras_accumulator=reference_accumulator, for_reference=False) trial_energy_dict = q.pe(weights_on_memory="sram", activations_on_memory="sram", min_sram_size=8 * 16 * 1024 * 1024, rd_wr_on_io=False) trial_size = q.extract_energy_sum(qtools_settings.cfg.include_energy, trial_energy_dict) # Reference energy number is now updated with keras_accumulator as # output quantizer tmp = ref_energy_dict["d0"]["energy"] assert tmp["inputs"] == pytest.approx(372.77, abs=0.1) assert tmp["outputs"] == pytest.approx(570.57, abs=0.1) assert tmp["parameters"] == pytest.approx(111975.96, abs=0.1) assert tmp["op_cost"] == pytest.approx(70560.0, abs=0.1) tmp = ref_energy_dict["d1"]["energy"] assert tmp["inputs"] == pytest.approx(570.57, abs=0.1) assert tmp["outputs"] == pytest.approx(190.19, abs=0.1) assert tmp["parameters"] == pytest.approx(14313.66, abs=0.1) assert tmp["op_cost"] == pytest.approx(26500.0, abs=0.1) tmp = ref_energy_dict["d2"]["energy"] assert tmp["inputs"] == pytest.approx(190.19, abs=0.1) assert tmp["outputs"] == pytest.approx(19.02, abs=0.1) assert tmp["parameters"] == pytest.approx(483.08, abs=0.1) assert tmp["op_cost"] == pytest.approx(883.33, abs=0.1) # Trial tmp = trial_energy_dict["d0"]["energy"] assert tmp["inputs"] == pytest.approx(372.77, abs=0.1) assert tmp["outputs"] == pytest.approx(342.34, abs=0.1) assert tmp["parameters"] == pytest.approx(13997.95, abs=0.1) assert tmp["op_cost"] == pytest.approx(15729.0, abs=0.1) tmp = trial_energy_dict["d1"]["energy"] assert tmp["inputs"] == pytest.approx(72.27, abs=0.1) assert tmp["outputs"] == pytest.approx(110.31, abs=0.1) assert tmp["parameters"] == pytest.approx(7158.73, abs=0.1) assert tmp["op_cost"] == pytest.approx(3250.0, abs=0.1) tmp = trial_energy_dict["d2"]["energy"] assert tmp["inputs"] == pytest.approx(26.63, abs=0.1) assert tmp["outputs"] == pytest.approx(11.41, abs=0.1) assert tmp["parameters"] == pytest.approx(243.44, abs=0.1) assert tmp["op_cost"] == pytest.approx(102.08, abs=0.1) # print(ref_energy_dict) # print(trial_energy_dict) assert int(reference_size) == 226629 assert int(trial_size) == 41070
def test_qnetwork(): x = x_in = Input((28, 28, 1), name='input') x = QSeparableConv2D(32, (2, 2), strides=(2, 2), depthwise_quantizer=binary(), pointwise_quantizer=quantized_bits(4, 0, 1), depthwise_activation=quantized_bits(6, 2, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_0_m')(x) x = QActivation('quantized_relu(6,2,1)', name='act0_m')(x) x = QConv2D(64, (3, 3), strides=(2, 2), kernel_quantizer=ternary(), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_1_m')(x) x = QActivation('quantized_relu(6, 3, 1)', name='act1_m')(x) x = QConv2D(64, (2, 2), strides=(2, 2), kernel_quantizer=quantized_bits(6, 2, 1), bias_quantizer=quantized_bits(4, 0, 1), name='conv2d_2_m')(x) x = QActivation('quantized_relu(6,4,1)', name='act2_m')(x) x = Flatten(name='flatten')(x) x = QDense(10, kernel_quantizer=quantized_bits(6, 2, 1), bias_quantizer=quantized_bits(4, 0, 1), name='dense')(x) x = Activation('softmax', name='softmax')(x) model = Model(inputs=[x_in], outputs=[x]) # generate same output for weights np.random.seed(42) for layer in model.layers: all_weights = [] for i, weights in enumerate(layer.get_weights()): input_size = np.prod(layer.input.shape.as_list()[1:]) if input_size is None: input_size = 576 * 10 # hack to avoid learning sizes shape = weights.shape assert input_size > 0, 'input size for {} {}'.format(layer.name, i) # he normal initialization with a scale factor of 2.0 all_weights.append( 10.0 * np.random.normal(0.0, np.sqrt(2.0 / input_size), shape)) if all_weights: layer.set_weights(all_weights) # apply quantizer to weights model_save_quantized_weights(model) all_weights = [] for layer in model.layers: for i, weights in enumerate(layer.get_weights()): w = np.sum(weights) all_weights.append(w) all_weights = np.array(all_weights) # test_qnetwork_weight_quantization all_weights_signature = np.array( [2.0, -6.75, -0.625, -2.0, -0.25, -56.0, 1.125, -2.625, -0.75]) assert all_weights.size == all_weights_signature.size assert np.all(all_weights == all_weights_signature) # test_qnetwork_forward: y = np.array([[ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 5.341e-02, 9.468e-01, 0.000e+00, 0.000e+00, 0.000e+00 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 5.960e-08, 0.000e+00, 1.919e-01, 0.000e+00, 0.000e+00, 8.081e-01 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 2.378e-04, 0.000e+00, 0.000e+00, 0.000e+00, 2.843e-05, 9.995e-01 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 2.623e-06, 0.000e+00 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 7.749e-07, 0.000e+00, 0.000e+00, 1.634e-04, 1.000e+00 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00 ], [ 0.000e+00, 1.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 6.557e-07, 0.000e+00, 0.000e+00, 0.000e+00 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 1.000e+00, 0.000e+00, 5.960e-08, 0.000e+00, 0.000e+00, 0.000e+00 ], [ 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 9.125e-03, 9.907e-01, 9.418e-06, 0.000e+00, 5.597e-05, 0.000e+00 ]]).astype(np.float16) inputs = 2 * np.random.rand(10, 28, 28, 1) p = model.predict(inputs).astype(np.float16) assert np.all(p == y)
)) hidden_layers = Reshape((n_input_features, ), name='reshaped_hidden')(inputs) for l in lays[1:-3]: hidden_layers = l(hidden_layers) phi_weights = lays[-3].get_weights() eta_weights = lays[-2].get_weights() pt_weights = lays[-1].get_weights() combined_W = np.concatenate((phi_weights[0], eta_weights[0], pt_weights[0]), -1) combined_b = np.concatenate((phi_weights[1], eta_weights[1], pt_weights[1]), -1) new_output_layer = QDense(3, activation='linear', weights=[combined_W, combined_b], name='output3')(hidden_layers) inference_model = Model(inputs=inputs, outputs=[new_output_layer]) inference_model.summary() # In[8]: # doesn't work --> because of tf version import keras2onnx as k2o import tf2onnx onnx_model = k2o.convert_keras(model, model.name) k2o.save_model(onnx_model, 'models/qkeras_models/finaly.onnx') # In[ ]: