def get_config(quant_target=base_config.QuantTarget.weights_and_auto_acts): """Gets Resnet50 config for 4 bits weights and auto activation quantization. conv_init and last dense layer not quantized as these are the most sensitive layers in the model. Args: quant_target: quantization target, of type QuantTarget. Returns: ConfigDict instance. """ config = base_config.get_config( imagenet_type=base_config.ImagenetType.resnet50, quant_target=quant_target) config.weight_prec = 4 config.quant_act.prec = 4 config.model_hparams.conv_init.weight_prec = 8 config.model_hparams.conv_init.quant_act.prec = 8 config.model_hparams.dense_layer.weight_prec = 8 config.model_hparams.dense_layer.quant_act.prec = 8 return config
def get_config(quant_target=base_config.QuantTarget.weights_only): config = base_config.get_config( imagenet_type=base_config.ImagenetType.resnet50, quant_target=quant_target) config.weight_prec = 4 config.model_hparams.conv_init.weight_prec = 8 config.model_hparams.dense_layer.weight_prec = 8 return config
def get_config(): """Returns sweep configuration (see module docstring).""" sweep_config = ml_collections.ConfigDict() base_config_dict = base_config.get_config( imagenet_type=base_config.ImagenetType.resnet50, quant_target=base_config.QuantTarget.weights_only) configs = [] for half_shift in [False, True]: for prec in [1, 2, 3]: config = copy.deepcopy(base_config_dict) config.weight_prec = prec config.model_hparams.conv_init.weight_prec = 8 config.model_hparams.dense_layer.weight_prec = 8 config.half_shift = half_shift configs.append(config) sweep_config.configs = configs return sweep_config
def get_config(quant_target=base_config.QuantTarget.none): return base_config.get_config( imagenet_type=base_config.ImagenetType.resnet152, quant_target=quant_target)
def get_config(quant_target=base_config.QuantTarget.weights_only): config = base_config.get_config( imagenet_type=base_config.ImagenetType.resnet50, quant_target=quant_target) config.weight_prec = 8 return config
def get_config(quant_target=base_config.QuantTarget.weights_and_auto_acts): """Gets Resnet50 config for 8 bits weights and 1 bit auto activation quantization. conv_init and last dense layer not quantized as these are the most sensitive layers in the model. Args: quant_target: quantization target, of type QuantTarget. Returns: ConfigDict instance. """ def set_init_bound_coeff(field): # input should be a class field so that the changes in this function # will be global to the class even without a return value field.cams_coeff = 0.0 field.cams_stddev_coeff = 0.0 field.mean_of_max_coeff = 0.0 field.stddev_coeff = 0.0 field.absdev_coeff = 0.0 field.fixed_bound = 0.0 field.granularity = "per_channel" field.use_old_code = False def reset_bound_for_convinit_dense(config): # reset bound haparams for conv_init and dense layers # use mean_of_max to automatically calculate the bound values set_init_bound_coeff(config.model_hparams.dense_layer.quant_act.bounds) config.model_hparams.dense_layer.quant_act.bounds.initial_bound = -1 config.model_hparams.dense_layer.quant_act.bounds.mean_of_max_coeff = 1.0 set_init_bound_coeff(config.model_hparams.conv_init.quant_act.bounds) config.model_hparams.conv_init.quant_act.bounds.initial_bound = -1 config.model_hparams.conv_init.quant_act.bounds.mean_of_max_coeff = 1.0 return config # create an init config which the sweep configs will be based on config_init = base_config.get_config( imagenet_type=base_config.ImagenetType.resnet50, quant_target=quant_target) config_init.weight_prec = 8 config_init.quant_act.prec = 1 config_init.half_shift = True config_init.base_learning_rate = 2e-5 config_init.activation_bound_start_step = 7500 # set act function and shortcut method to each conv layer config_init.act_function = "none" config_init.shortcut_ch_shrink_method = "none" config_init.shortcut_ch_expand_method = "none" config_init.shortcut_spatial_method = "none" # set learning rate scheduler config_init.lr_scheduler.num_epochs = 250 config_init.lr_scheduler.warmup_epochs = 5 config_init.lr_scheduler.cooldown_epochs = 0 config_init.lr_scheduler.scheduler = "linear" # -1 means no early stopping by default config_init.early_stop_steps = -1 # optimizer params config_init.optimizer = "adam" config_init.adam.beta1 = 0.9 config_init.adam.beta2 = 0.999 # Conv_init and dense layers will have floating-point weights and acts config_init.model_hparams.conv_init.weight_prec = None config_init.model_hparams.conv_init.quant_act.prec = None config_init.model_hparams.dense_layer.weight_prec = None config_init.model_hparams.dense_layer.quant_act.prec = None # set all of the input distributions to "symmetric" config_init.model_hparams.dense_layer.quant_act.input_distribution = "symmetric" config_init.model_hparams.conv_init.quant_act.input_distribution = "symmetric" for residual_block in config_init.model_hparams.residual_blocks: residual_block.conv_1.quant_act.input_distribution = "symmetric" residual_block.conv_2.quant_act.input_distribution = "symmetric" residual_block.conv_3.quant_act.input_distribution = "symmetric" if residual_block.conv_proj is not None: residual_block.conv_proj.quant_act.input_distribution = "symmetric" # set bound hparams to all zero for activations # will update one of the bound hparams at a time in sweep configs set_init_bound_coeff(config_init.quant_act.bounds) # set initial bound value config_init.quant_act.bounds.initial_bound = 2.0 # name of the experiment on TB config_init.metadata.hyper_str = "w8a1" # create a collection of config files for sweeping sweep_config = ml_collections.ConfigDict() configs = [] # leaderboard configs for testing purpose configs.append(bfloat16_paper_config()) configs.append(w8a8auto_paper_config()) configs.append(w4a4auto_paper_config()) # baseline: act_function [none, bprelu], no additional shortcuts for act_function in ["none", "bprelu"]: for fix_bound in [3.0]: config = copy.deepcopy(config_init) config.act_function = act_function config.quant_act.bounds.fixed_bound = fix_bound # reset bound haparams for conv_init and dense layers config = reset_bound_for_convinit_dense(config) config.metadata.hyper_str += f"_{act_function}_baseline" configs.append(config) # Turn on sc1, sc2, sc3 # Sweep sc1, sc3 both with different methods for act_function in ["bprelu"]: for fix_bound in [3.0]: for shortcut_shrink_method in ["consecutive"]: for shortcut_expand_method in ["zeropad"]: config = copy.deepcopy(config_init) config.act_function = act_function config.quant_act.bounds.fixed_bound = fix_bound config.shortcut_ch_shrink_method = shortcut_shrink_method config.shortcut_ch_expand_method = shortcut_expand_method config.shortcut_spatial_method = "max_pool" # reset bound haparams for conv_init and dense layers config = reset_bound_for_convinit_dense(config) config.metadata.hyper_str += f"_{act_function}_sc123_shrink_{shortcut_shrink_method}_expand_{shortcut_expand_method}" configs.append(config) sweep_config.configs = configs return sweep_config