def load_quantized_model(model, ckpt_path, session, name): """Loads quantized model and dequantizes variables""" start_time = time.time() dequant_ops = [] for tsr in tf.trainable_variables(): with tf.variable_scope(tsr.name.split(":")[0], reuse=True): quant_tsr = tf.get_variable("quantized", dtype=tf.qint8) min_range = tf.get_variable("min_range") max_range = tf.get_variable("max_range") dequant_ops.append(tsr.assign(tf.dequantize(quant_tsr, min_range, max_range, "SCALED"))) restore_list = [tsr for tsr in tf.global_variables() if tsr not in tf.trainable_variables()] saver = tf.train.Saver(restore_list) try: saver.restore(session, ckpt_path) except tf.errors.NotFoundError as e: utils.print_out("Can't load checkpoint") print_variables_in_ckpt(ckpt_path) utils.print_out("%s" % str(e)) session.run(tf.tables_initializer()) session.run(dequant_ops) utils.print_out( " loaded %s model parameters from %s, time %.2fs" % (name, ckpt_path, time.time() - start_time) ) return model
def _testDequantizeOp(self, inputs, min_range, max_range, dtype): with self.test_session(): input_op = tf.constant(inputs, shape=[len(inputs)], dtype=dtype) dequantized = tf.dequantize(input_op, min_range, max_range) tf_ans = dequantized.eval() # TODO(vrv): Add support for DT_QINT32 quantization if needed. type_dict = { tf.quint8: np.uint8, tf.qint8: np.int8, tf.quint16: np.uint16, tf.qint16: np.int16 } self.assertTrue(dtype in type_dict.keys()) v_max = np.iinfo(type_dict[dtype]).max v_min = np.iinfo(type_dict[dtype]).min self.assertTrue(min_range >= v_min) self.assertTrue(max_range <= v_max) type_range = v_max - v_min if v_min < 0: half_range = (type_range + 1) / 2 else: half_range = 0.0 np_ans = ((inputs.astype(np.float32) + half_range) * (max_range - min_range) / type_range) + min_range self.assertAllClose(tf_ans, np_ans)
def _testDequantizeOp(self, inputs, min_range, max_range, dtype): with self.test_session(): input_op = tf.constant(inputs, shape=[len(inputs)], dtype=dtype) dequantized = tf.dequantize( input_op, min_range, max_range) tf_ans = dequantized.eval() # TODO(vrv): Add support for DT_QINT32 quantization if needed. type_dict = { tf.quint8: np.uint8, tf.qint8: np.int8, tf.quint16: np.uint16, tf.qint16: np.int16 } self.assertTrue(dtype in type_dict.keys()) v_max = np.iinfo(type_dict[dtype]).max v_min = np.iinfo(type_dict[dtype]).min self.assertTrue(min_range >= v_min) self.assertTrue(max_range <= v_max) type_range = v_max - v_min if v_min < 0: half_range = (type_range + 1) / 2 else: half_range = 0.0 np_ans = ((inputs.astype(np.float32) + half_range) * (max_range - min_range) / type_range) + min_range self.assertAllClose(tf_ans, np_ans)
def _add_assign_add(): scale = tf.placeholder("double", name="pok/update/scale") typedScales = { scale.dtype: scale, } group_assign = [] group_assign_add = [] group_assign_add_quantized = [] for v in tf.trainable_variables(): # Input variables name = "pok/update/var/" + v.name.replace(':', '/') vin = tf.placeholder(v.dtype, v.shape, name=name) typedScale = typedScales.get(v.dtype) if typedScale is None: typedScale = tf.cast(scale, v.dtype) typedScales[v.dtype] = typedScale group_assign.append(v.assign(vin)) assign_add = v.assign_add(vin * typedScale) group_assign_add.append(assign_add) # Add quantize assign_add + variable export if v.dtype.base_dtype != tf.float32: group_assign_add_quantized.append(assign_add) continue # Quantized output weights flat_name = v.name.replace(':', '/') name = "pok/quant/out/" + flat_name minv = tf.reduce_min(v, name=name + '/min') maxv = tf.reduce_max(v, name=name + '/max') quantized_dtype = tf.quint8 quantized = tf.quantize_v2(v, minv, maxv, quantized_dtype, name=name) # Quantized input weights name = "pok/quant/in/" + flat_name inp = tf.placeholder(quantized_dtype, v.shape, name=name) minp = tf.placeholder(tf.float32, name=name + '/min') maxp = tf.placeholder(tf.float32, name=name + '/max') dequantized = tf.dequantize(inp, minp, maxp) assign_add = v.assign_add(dequantized * typedScale) group_assign_add_quantized.append(assign_add) tf.group(*group_assign, name="pok/update/assign") tf.group(*group_assign_add, name="pok/update/assign_add") tf.group(*group_assign_add_quantized, name="pok/update/assign_add_quant")
def model(x): variables = {} y = tf.dequantize(x, 0.0, 6.0, name='ys') return y, variables