def getEnergy(model): reference_internal = "fp32" reference_accumulator = "fp32" q = run_qtools.QTools( model, process="horowitz", source_quantizers=[quantized_bits(16, 6, 1)], is_inference=False, weights_path=None, keras_quantizer=reference_internal, keras_accumulator=reference_accumulator, for_reference=True) energy_dict = q.pe( weights_on_memory="fixed", activations_on_memory="fixed", min_sram_size=8*16*1024*1024, rd_wr_on_io=False) energy_profile = q.extract_energy_profile(qtools_settings.cfg.include_energy, energy_dict) total_energy = q.extract_energy_sum(qtools_settings.cfg.include_energy, energy_dict) pprint.pprint(energy_profile) print() print("Total energy: {:.2f} uJ".format(total_energy / 1000000.0))
def GetEnergy(model, verbose=False): # energy estimation reference_internal = 'fp32' reference_accumulator = 'fp32' proc = 'horowitz' q = run_qtools.QTools( model, process=proc, source_quantizers=[quantized_bits(8, 0, 1)], is_inference=False, weights_path=None, keras_quantizer=reference_internal, keras_accumulator=reference_accumulator, # whether calculate baseline energy for_reference=True) energy_dict = q.pe( weights_on_memory="sram", activations_on_memory="sram", min_sram_size=8 * 16 * 1024 * 1024, # minimum sram size in number of bits. Let's assume a 16MB SRAM. rd_wr_on_io=False) # assuming data alreadu in SRAM energy_profile = q.extract_energy_profile( qtools_settings.cfg.include_energy, energy_dict) total_energy = q.extract_energy_sum(qtools_settings.cfg.include_energy, energy_dict) if (verbose): pprint.pprint(energy_profile) print() print("Total energy: {:.2f} uJ".format(total_energy / 1000000.0)) return
def get_trial(self, model): """Computes size of quantization trial.""" q = run_qtools.QTools(model, process=self.process, source_quantizers=self.source_quantizers, is_inference=self.trained_model, weights_path=None, keras_quantizer=self.reference_internal, keras_accumulator=self.reference_accumulator, for_reference=False) energy_dict = q.pe(weights_on_memory=self.parameters_on_memory[1], activations_on_memory=self.activations_on_memory[1], min_sram_size=self.min_sram_size[1], rd_wr_on_io=self.rd_wr_on_io[1]) self.trial_energy_dict = energy_dict # self.trial_size = energy_dict["total_cost"] self.trial_size = q.extract_energy_sum( qtools_settings.cfg.include_energy, energy_dict) self.trial_energy_profile = q.extract_energy_profile( qtools_settings.cfg.include_energy, energy_dict) return self.trial_size
def get_reference(self, model): # we only want to compute reference once if self.reference_size is not None: return self.reference_size * self.stress q = run_qtools.QTools(model, process=self.process, source_quantizers=self.reference_internal, is_inference=self.trained_model, weights_path=None, keras_quantizer=self.reference_internal, keras_accumulator=self.reference_accumulator, for_reference=True) energy_dict = q.pe(weights_on_memory=self.parameters_on_memory[0], activations_on_memory=self.activations_on_memory[0], min_sram_size=self.min_sram_size[0], rd_wr_on_io=self.rd_wr_on_io[0]) self.ref_energy_dict = energy_dict self.reference_size = q.extract_energy_sum( qtools_settings.cfg.include_energy, energy_dict) self.reference_energy_profile = q.extract_energy_profile( qtools_settings.cfg.include_energy, energy_dict) return self.reference_size * self.stress
def get_qenergy(model, qenergy_config, for_reference): q = run_qtools.QTools( model, process=qenergy_config["process"], source_quantizers=qenergy_config["reference_internal"], is_inference=qenergy_config["trained_model"], weights_path=None, keras_quantizer=qenergy_config["reference_internal"], keras_accumulator=qenergy_config["reference_accumulator"], for_reference=for_reference) # caculate energy of the derived data type map. energy_dict = q.pe( weights_on_memory=qenergy_config["parameters_on_memory"], activations_on_memory=qenergy_config["activations_on_memory"], min_sram_size=qenergy_config["min_sram_size"], rd_wr_on_io=qenergy_config["rd_wr_on_io"]) total_energy = q.extract_energy_sum(qtools_settings.cfg.include_energy, energy_dict) return q, total_energy
reference_accumulator = "int32" # By setting for_reference=True, we create QTools object which uses # keras_quantizer to quantize weights/bias and # keras_accumulator to quantize MAC variables for all layers. Obviously, this # overwrites any quantizers that user specified in the qkeras layers. The # purpose of doing so is to enable user to calculate a baseline energy number # for a given model architecture and compare it against quantized models. q = run_qtools.QTools( model, # energy calculation using a given process process="horowitz", # quantizers for model input source_quantizers=[quantizers.quantized_bits(8, 0, 1)], is_inference=False, # absolute path (including filename) of the model weights weights_path=None, # keras_quantizer to quantize weight/bias in un-quantized keras layers keras_quantizer=reference_internal, # keras_quantizer to quantize MAC in un-quantized keras layers keras_accumulator=reference_accumulator, # whether calculate baseline energy for_reference=True) # caculate energy of the derived data type map. ref_energy_dict = q.pe( # whether to store parameters in dram, sram, or fixed weights_on_memory="sram", # store activations in dram or sram activations_on_memory="sram", # minimum sram size in number of bits
def test_qenergy(): x = x_in = keras.layers.Input((784, ), name="input") x = QDense(300, kernel_quantizer=quantizers.binary(), bias_quantizer=quantizers.binary(), name="d0")(x) x = QActivation("quantized_relu(4,0)", name="d0_qr4")(x) x = QDense(100, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="d1")(x) x = QAdaptiveActivation("quantized_relu", 4, name="d1_qr4")(x) x = QDense(10, kernel_quantizer=quantizers.quantized_bits(4, 0, 1), bias_quantizer=quantizers.quantized_bits(4, 0, 1), name="d2")(x) x = keras.layers.Activation("softmax", name="softmax")(x) model = keras.Model(inputs=[x_in], outputs=[x]) # print(model.summary()) reference_internal = "int8" reference_accumulator = "int32" # get reference energy cost q = run_qtools.QTools(model, process="horowitz", source_quantizers=reference_internal, is_inference=False, weights_path=None, keras_quantizer=reference_internal, keras_accumulator=reference_accumulator, for_reference=True) ref_energy_dict = q.pe(weights_on_memory="sram", activations_on_memory="sram", min_sram_size=8 * 16 * 1024 * 1024, rd_wr_on_io=False) reference_size = q.extract_energy_sum(qtools_settings.cfg.include_energy, ref_energy_dict) # get trial energy cost q = run_qtools.QTools(model, process="horowitz", source_quantizers=reference_internal, is_inference=False, weights_path=None, keras_quantizer=reference_internal, keras_accumulator=reference_accumulator, for_reference=False) trial_energy_dict = q.pe(weights_on_memory="sram", activations_on_memory="sram", min_sram_size=8 * 16 * 1024 * 1024, rd_wr_on_io=False) trial_size = q.extract_energy_sum(qtools_settings.cfg.include_energy, trial_energy_dict) # Reference energy number is now updated with keras_accumulator as # output quantizer tmp = ref_energy_dict["d0"]["energy"] assert tmp["inputs"] == pytest.approx(372.77, abs=0.1) assert tmp["outputs"] == pytest.approx(570.57, abs=0.1) assert tmp["parameters"] == pytest.approx(111975.96, abs=0.1) assert tmp["op_cost"] == pytest.approx(70560.0, abs=0.1) tmp = ref_energy_dict["d1"]["energy"] assert tmp["inputs"] == pytest.approx(570.57, abs=0.1) assert tmp["outputs"] == pytest.approx(190.19, abs=0.1) assert tmp["parameters"] == pytest.approx(14313.66, abs=0.1) assert tmp["op_cost"] == pytest.approx(26500.0, abs=0.1) tmp = ref_energy_dict["d2"]["energy"] assert tmp["inputs"] == pytest.approx(190.19, abs=0.1) assert tmp["outputs"] == pytest.approx(19.02, abs=0.1) assert tmp["parameters"] == pytest.approx(483.08, abs=0.1) assert tmp["op_cost"] == pytest.approx(883.33, abs=0.1) # Trial tmp = trial_energy_dict["d0"]["energy"] assert tmp["inputs"] == pytest.approx(372.77, abs=0.1) assert tmp["outputs"] == pytest.approx(342.34, abs=0.1) assert tmp["parameters"] == pytest.approx(13997.95, abs=0.1) assert tmp["op_cost"] == pytest.approx(15729.0, abs=0.1) tmp = trial_energy_dict["d1"]["energy"] assert tmp["inputs"] == pytest.approx(72.27, abs=0.1) assert tmp["outputs"] == pytest.approx(110.31, abs=0.1) assert tmp["parameters"] == pytest.approx(7158.73, abs=0.1) assert tmp["op_cost"] == pytest.approx(3250.0, abs=0.1) tmp = trial_energy_dict["d2"]["energy"] assert tmp["inputs"] == pytest.approx(26.63, abs=0.1) assert tmp["outputs"] == pytest.approx(11.41, abs=0.1) assert tmp["parameters"] == pytest.approx(243.44, abs=0.1) assert tmp["op_cost"] == pytest.approx(102.08, abs=0.1) # print(ref_energy_dict) # print(trial_energy_dict) assert int(reference_size) == 226629 assert int(trial_size) == 41070
layer_name = layer.__class__.__name__ parameters = aq._param_size(layer) activations = aq._act_size(layer) print("Parameters {}:{}".format(layer.name,parameters)) print("Activations {}:{}".format(layer.name,activations)) total_size_params += parameters total_size_acts += activations total_size, p_size, a_size, model_size_dict = aq.compute_model_size(model) flops = get_flops(model, batch_size=1) print(f"FLOPS: {flops / 10 ** 9:.03} G") q = run_qtools.QTools(model, process="horowitz", source_quantizers=[quantized_bits(16, 6, 1)], is_inference=False, weights_path=None,keras_quantizer="fp32",keras_accumulator="fp32", for_reference=False) q.qtools_stats_print() # caculate energy of the derived data type map. energy_dict = q.pe( # whether to store parameters in dram, sram, or fixed weights_on_memory="sram", # store activations in dram or sram activations_on_memory="sram", # minimum sram size in number of bits. Let's assume a 16MB SRAM. min_sram_size=8*16*1024*1024, # whether load data from dram to sram (consider sram as a cache # for dram. If false, we will assume data will be already in SRAM rd_wr_on_io=False) # get stats of energy distribution in each layer
def generate_json(in_model): """example to generate data type map for a given model. Args: in_model: qkeras model object Usage: input_quantizer_list: A list of input quantizers for the model. It could be in the form of: 1. a list of quantizers, each quantizer for each one of the model inputs 2. one single quantizer, which will be used for all of the model inputs 3. None. Default input quantizer defined in config_xxx.py will be used for all of the model inputs for_reference: get energy for a reference model/trial model 1. True: get baseline energy for a given model. Use keras_quantizer/keras_ accumulator (or default_interm_quantizer in config_xxx.py if keras_ quantizer/keras_accumulator not given) to quantizer all layers in a model in order to calculate its energy. It servers the purpose of setting up a baseline energy for a given model architecture. 2. False: get "real" energy for a given model use user-specified quantizers. For layers that are not quantized (keras layer) or have no user-specified quantizers (qkeras layers without quantizers specified), keras_quantizer and keras_accumulator(or default_interm_quantizer in config_xxx.py if keras_quantizer/keras_accumulator not given) will be used as their quantizers. process: technology process to use in configuration (horowitz, ...) weights_path: absolute path to the model weights is_inference: whether model has been trained already, which is needed to compute tighter bounds for QBatchNormalization Power estimation Other parameters (defined in config_xxx.py): 1. "default_source_quantizer" is used as default input quantizer if user do not specify any input quantizers, 2. "default_interm_quantizer": is used as default quantizer for any intermediate variables such as multiplier, accumulator, weight/bias in a qkeras layer if user do not secifiy the corresponding variable 3. process_name: energy calculation parameters for different processes. "horowitz" is the process we use by default. 4. "include_energy": what energy to include at each layer when calculation the total energy of the entire model. "parameters": memory access energy for loading model parameters. "inputs": memory access energy to reading inputs "outputs": memory access energy for writing outputs "op_cost": operation energy for multiplication and accumulation """ input_quantizer_list = [quantizers.quantized_bits(8, 0, 1)] reference_internal = "int8" reference_accumulator = "int32" # generate QTools object which contains model data type map in json format q = run_qtools.QTools( in_model, # energy calculation using a given process process="horowitz", # quantizers for model inputs source_quantizers=input_quantizer_list, # training or inference with a pre-trained model is_inference=False, # path to pre-trained model weights weights_path=None, # keras_quantizer to quantize weight/bias in non-quantized keras layers keras_quantizer=reference_internal, # keras_accumulator to quantize MAC in un-quantized keras layers keras_accumulator=reference_accumulator, # calculating baseline energy or not for_reference=False) # print data type map q.qtools_stats_print()