def before_train(self, logs=None): """Be called before the training process.""" self.input = None self.flops = None self.params = None self.latency = None self.calc_params_each_epoch = self.trainer.config.calc_params_each_epoch self.calc_latency = self.trainer.config.calc_latency if vega.is_tf_backend(): import tensorflow as tf datasets = self.trainer.valid_input_fn() data_iter = tf.compat.v1.data.make_one_shot_iterator(datasets) # data_iter = self.trainer.valid_input_fn().make_one_shot_iterator() input_data, _ = data_iter.get_next() self.input = input_data[:1] elif vega.is_torch_backend(): for batch in self.trainer.valid_loader: batch = self.trainer._set_device(batch) if isinstance(batch, dict): self.input = batch elif isinstance(batch, list) and isinstance(batch[0], dict): self.input = batch[:1] else: # classification self.input = batch[0][:1] break self.update_flops_params(logs=logs)
def __call__(self, model=None, distributed=False, **kwargs): """Call Optimizer class. :param model: model, used in torch case :param distributed: use distributed :return: optimizer """ params = self.map_config.get("params", {}) logging.debug("Call Optimizer. name={}, params={}".format( self.optim_cls.__name__, params)) optimizer = None try: if vega.is_torch_backend(): learnable_params = [ param for param in model.parameters() if param.requires_grad ] optimizer = self.optim_cls(learnable_params, **params) if distributed: optimizer = self.set_distributed(optimizer, model) elif vega.is_tf_backend(): optimizer = dynamic_optimizer(self.optim_cls, **params) elif vega.is_ms_backend(): if "dynamic_lr" in kwargs: params.update({"learning_rate": kwargs["dynamic_lr"]}) learnable_params = [ param for param in model.trainable_params() if param.requires_grad ] optimizer = self.optim_cls(learnable_params, **params) return optimizer except Exception as ex: logging.error("Failed to call Optimizer name={}, params={}".format( self.optim_cls.__name__, params)) raise ex
def load_checkpoint(self, worker_id=None, step_name=None, saved_folder=None): """Load checkpoint.""" if saved_folder is None: if worker_id is None: worker_id = self.worker_id if step_name is None: step_name = self.step_name saved_folder = self.get_local_worker_path(step_name, worker_id) checkpoint_file = FileOps.join_path(saved_folder, self.checkpoint_file_name) model_pickle_file = FileOps.join_path(saved_folder, self.model_pickle_file_name) try: with open(model_pickle_file, 'rb') as f: model = pickle.load(f) if vega.is_torch_backend(): ckpt = torch.load(checkpoint_file, map_location=torch.device('cpu')) model.load_state_dict(ckpt['weight']) if self.config.cuda: model = model.cuda() elif vega.is_tf_backend(): FileOps.copy_folder(saved_folder, self.get_local_worker_path()) self.model = model except Exception: logging.info( 'Checkpoint file is not existed, use default model now.') return
def _generate_init_model(self): """Generate init model by loading pretrained model. :return: initial model after loading pretrained model :rtype: torch.nn.Module """ model_init = self._new_model_init() chn_mask = self._init_chn_node_mask() if vega.is_torch_backend(): checkpoint = torch.load(self.config.init_model_file + '.pth') model_init.load_state_dict(checkpoint) model = PruneMobileNet(model_init).apply(chn_mask) model.to(self.device) elif vega.is_tf_backend(): model = model_init with tf.compat.v1.Session( config=self.trainer._init_session_config()) as sess: saver = tf.compat.v1.train.import_meta_graph("{}.meta".format( self.config.init_model_file)) saver.restore(sess, self.config.init_model_file) all_weight = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.VARIABLES) all_weight = [ t for t in all_weight if not t.name.endswith('Momentum:0') ] PruneMobileNet(all_weight).apply(chn_mask) save_file = FileOps.join_path( self.trainer.get_local_worker_path(), 'prune_model') saver.save(sess, save_file) elif vega.is_ms_backend(): parameter_dict = load_checkpoint(self.config.init_model_file) load_param_into_net(model_init, parameter_dict) model = PruneMobileNet(model_init).apply(chn_mask) return model
def eval_model_parameters(model): """Calculate number of parameters in million (M) for a model. :param model: A model :type model: nn.Module :return: The number of parameters :rtype: Float """ if vega.is_torch_backend(): return np.sum(v.numel() for name, v in model.named_parameters() if "auxiliary" not in name) / 1e6 elif vega.is_tf_backend(): import tensorflow as tf tf.compat.v1.reset_default_graph() dummy_input = tf.compat.v1.placeholder( dtype=tf.float32, shape=[1, 32, 32, 3] if model.data_format == 'channels_last' else [1, 3, 32, 32]) model.training = True model(dummy_input) all_weight = tf.compat.v1.get_collection( tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES) weight_op = [t for t in all_weight if "auxiliary" not in t.name] return np.sum([np.prod(t.get_shape().as_list()) for t in weight_op]) * 1e-6 elif vega.is_ms_backend(): return 0
def _to_tensor(self, data): if vega.is_torch_backend(): import torch return torch.tensor(data) elif vega.is_tf_backend(): import tensorflow as tf return tf.convert_to_tensor(data)
def _get_data_format(): if vega.is_torch_backend() or vega.is_ms_backend(): return 'channels_first' elif vega.is_tf_backend(): return 'channels_last' else: return None
def before_train(self, logs=None): """Be called before the train process.""" self.config = self.trainer.config self.device = vega.is_gpu_device() if vega.is_gpu_device( ) is not True else 0 self.base_net_desc = self.trainer.model.desc sess_config = None if vega.is_torch_backend(): if vega.is_npu_device(): count_input = torch.FloatTensor(1, 3, 32, 32).npu() elif vega.is_gpu_device(): count_input = torch.FloatTensor(1, 3, 32, 32).to(self.device) elif vega.is_tf_backend(): count_input = tf.random.uniform([1, 3, 32, 32], dtype=tf.float32) sess_config = self.trainer._init_session_config() elif vega.is_ms_backend(): count_input = mindspore.Tensor( np.random.randn(1, 3, 32, 32).astype(np.float32)) self.flops_count, self.params_count = calc_model_flops_params( self.trainer.model, count_input) self.latency_count = calc_forward_latency(self.trainer.model, count_input, sess_config) logging.info("after prune model glops=%sM, params=%sK, latency=%sms", self.flops_count * 1e-6, self.params_count * 1e-3, self.latency_count * 1000) self.trainer.model = self._generate_init_model() if vega.is_torch_backend(): self.trainer.optimizer = Optimizer()( model=self.trainer.model, distributed=self.trainer.distributed) self.trainer.lr_scheduler = LrScheduler()(self.trainer.optimizer)
def before_train(self, logs=None): """Be called before the train process.""" self.config = self.trainer.config model_code = copy.deepcopy(self.trainer.model.desc) model = self.trainer.model logging.info('current code: %s, %s', model_code.nbit_w_list, model_code.nbit_a_list) quantizer = Quantizer(model, model_code.nbit_w_list, model_code.nbit_a_list) model = quantizer() self.trainer.model = model count_input = [1, 3, 32, 32] sess_config = None if vega.is_torch_backend(): model = model.cuda() self.trainer.optimizer = Optimizer()( model=self.trainer.model, distributed=self.trainer.distributed) self.trainer.lr_scheduler = LrScheduler()(self.trainer.optimizer) count_input = torch.FloatTensor(*count_input).cuda() elif vega.is_tf_backend(): tf.compat.v1.reset_default_graph() count_input = tf.random.uniform(count_input, dtype=tf.float32) sess_config = self.trainer._init_session_config() self.flops_count, self.params_count = calc_model_flops_params( model, count_input, custom_hooks=quantizer.custom_hooks()) self.latency_count = calc_forward_latency(model, count_input, sess_config) logging.info("after quant model glops=%sM, params=%sK, latency=%sms", self.flops_count * 1e-6, self.params_count * 1e-3, self.latency_count * 1000) self.validate()
def step(self, train_x=None, train_y=None, valid_x=None, valid_y=None, lr=None, w_optimizer=None, w_loss=None, unrolled=None, scope_name=None): """Compute one step.""" if vega.is_torch_backend(): self.optimizer.zero_grad() loss = w_loss(self.model(valid_x), valid_y) loss.backward() self.optimizer.step() return elif vega.is_tf_backend(): self.lr = lr global_step = tf.compat.v1.train.get_global_step() loss_fn = self._init_loss() self.optimizer = self._init_arch_optimizer() logits = self.model(valid_x) logits = tf.cast(logits, tf.float32) loss = loss_fn(logits, valid_y) loss_scale = self.trainer_config.loss_scale if self.trainer_config.amp else 1. arch_op = self.model.get_weight_ops() if loss_scale != 1: scaled_grad_vars = self.optimizer.compute_gradients(loss * loss_scale, var_list=arch_op) unscaled_grad_vars = [(grad / loss_scale, var) for grad, var in scaled_grad_vars] minimize_op = self.optimizer.apply_gradients(unscaled_grad_vars, global_step) else: grad_vars = self.optimizer.compute_gradients(loss, var_list=arch_op) minimize_op = self.optimizer.apply_gradients(grad_vars, global_step) return minimize_op
def get_named_modules(layer): """Get named modules.""" if vega.is_tf_backend(): return [(op.name, op) for op in layer] elif vega.is_torch_backend(): return layer.named_modules() elif vega.is_ms_backend(): return layer._children_scope_recursive()
def _infer(args, loader, model=None): """Choose backend.""" if vega.is_torch_backend(): return _infer_pytorch(model, loader) elif vega.is_tf_backend(): return _infer_tf(args, model, loader) elif vega.is_ms_backend(): return _infer_ms(args, model, loader)
def _init_tf_estimator(self): """Init tensorflow estimator.""" if not vega.is_tf_backend(): return sess_config = self._init_session_config() if vega.is_gpu_device(): self._init_gpu_estimator(sess_config) elif vega.is_npu_device(): self._init_npu_estimator(sess_config)
def get_shape(layer): """Get weight shape.""" if vega.is_tf_backend(): return layer.get_shape() elif vega.is_torch_backend(): return layer.weight.data.shape elif vega.is_ms_backend(): para_name = list(layer._params)[0] return getattr(layer, para_name).default_input.shape
def before_train(self, logs=None): """Be called before the training process.""" self.input = None self.gflops = None self.kparams = None self.calc_params_each_epoch = self.trainer.config.calc_params_each_epoch if vega.is_tf_backend(): data_iter = self.trainer.valid_input_fn().make_one_shot_iterator() input_data, _ = data_iter.get_next() self.input = input_data[:1]
def Adapter(dataset): """Adapter of dataset.""" if vega.is_torch_backend(): from .pytorch.adapter import TorchAdapter as Adapter elif vega.is_tf_backend(): from .tensorflow.adapter import TfAdapter as Adapter elif vega.is_ms_backend(): from .mindspore.adapter import MsAdapter as Adapter else: raise ValueError return Adapter(dataset)
def _init_metrics(self, metrics=None): """Init metrics.""" if metrics is not None: return metrics else: if vega.is_torch_backend(): from vega.metrics.pytorch.metrics import Metrics elif vega.is_tf_backend(): from vega.metrics.tensorflow.metrics import Metrics elif vega.is_ms_backend(): from vega.metrics.mindspore.metrics import Metrics return Metrics()
def set_distributed(cls, optimizer, model=None): """Set distributed optimizer.""" if vega.is_torch_backend(): optimizer = hvd.DistributedOptimizer( optimizer, named_parameters=model.named_parameters(), compression=hvd.Compression.none) elif vega.is_tf_backend(): optim_class = hvd.DistributedOptimizer if vega.is_gpu_device( ) else NPUDistributedOptimizer optimizer = dynamic_distributed_optimizer(optim_class, optimizer) return optimizer
def _calc_forward_latency_davinci(model, input, sess_config=None, num=10, evaluate_config=None): """Model forward latency calculation. :param model: network model :type model: torch or tf module :param input: input tensor :type input: Tensor of torch or tf :param num: forward number :type num: int :param evaluate_config: some config for evaluate in davinci :type evaluate_config: dict :return: forward latency :rtype: float """ from vega.evaluator.tools.evaluate_davinci_bolt import evaluate from vega.common.task_ops import TaskOps # backend = evaluate_config.get("backend") hardware = evaluate_config.get("hardware") remote_host = evaluate_config.get("remote_host") worker_path = TaskOps().local_base_path save_data_file = os.path.join(worker_path, "input.bin") latency = 0. now_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f') job_id = "pre_evaluate_" + now_time logging.info("The job id of evaluate service is {}.".format(job_id)) if vega.is_torch_backend(): import torch input_shape = input.shape if torch.is_tensor(input): input = input.cpu().numpy() input.tofile(save_data_file) for index in range(num): reuse_model = False if index == 0 else True results = evaluate("pytorch", hardware, remote_host, model, None, save_data_file, input_shape, reuse_model, job_id) latency += np.float(results.get("latency")) elif vega.is_tf_backend(): input_shape = input.shape.as_list() test_data = np.random.random(input_shape).astype(np.float32) test_data.tofile(save_data_file) for index in range(num): reuse_model = False if index == 0 else True results = evaluate("tensorflow", hardware, remote_host, model, None, save_data_file, input_shape, reuse_model, job_id) latency += np.float(results.get("latency")) return latency / num
def __init__(self, type_dict, params_dict): """Init config backend mapping.""" self.type_mapping_dict = copy.deepcopy(type_dict) self.params_mapping_dict = copy.deepcopy(params_dict) self.backend_type = None if vega.is_torch_backend(): self.backend_type = 'torch' elif vega.is_tf_backend(): self.backend_type = 'tf' elif vega.is_ms_backend(): self.backend_type = 'ms' else: raise ValueError('Backend type must be torch, tf or ms.')
def _to_tensor(data): """Change data to tensor.""" if vega.is_torch_backend(): import torch data = torch.tensor(data) if args.device == "GPU": return data.cuda() else: return data elif vega.is_tf_backend(): import tensorflow as tf data = tf.convert_to_tensor(data) return data
def _get_arch_weights(self): if vega.is_torch_backend(): arch_weights = self.model.arch_weights elif vega.is_tf_backend(): sess_config = self.trainer._init_session_config() with tf.Session(config=sess_config) as sess: # tf.reset_default_graph() checkpoint_file = tf.train.latest_checkpoint(self.trainer.get_local_worker_path()) saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file)) saver.restore(sess, checkpoint_file) arch_weights = self.model.arch_weights arch_weights = [weight.eval() for weight in arch_weights] return arch_weights
def _load_pretrained_model(self): if self.model is None: return if self.config.pretrained_model_file is not None: model_file = self.config.pretrained_model_file model_file = os.path.abspath(model_file) if vega.is_torch_backend(): ckpt = torch.load(model_file) self.model.load_state_dict(ckpt) elif vega.is_tf_backend(): model_folder = os.path.dirname(model_file) FileOps.copy_folder(model_folder, self.get_local_worker_path()) return
def before_train(self, logs=None): """Be called before the training process.""" self.config = self.trainer.config if vega.is_torch_backend(): count_input = torch.FloatTensor(1, 3, 192, 192).cuda() elif vega.is_tf_backend(): tf.reset_default_graph() count_input = tf.random_uniform([1, 192, 192, 3], dtype=tf.float32) flops_count, params_count = calc_model_flops_params(self.trainer.model, count_input) self.flops_count, self.params_count = flops_count * 1e-9, params_count * 1e-3 logger.info("Flops: {:.2f} G, Params: {:.1f} K".format(self.flops_count, self.params_count)) if self.flops_count > self.config.flops_limit: logger.info("Flop too large!") self.trainer.skip_train = True
def calc_model_flops_params(model, input, custom_hooks=None, verbose=False): """Pytorch model flops and parameters calculation. :param model: pytorch model :type model: torch.nn.Module :param input: pytorch input tensor :type input: torch.Tensor :param custom_hooks: hooks defined by outside customer :type custom_hooks: dict or None :param verbose: whether to print op type which not in collection :type verbose: bool, default True :return: flops and params :rtype: float, float """ try: _model = deepcopy(model) except Exception: _model = model if vega.is_torch_backend(): from thop import profile if custom_hooks is None: custom_hooks = {} custom_hooks = add_new_hooks(custom_hooks) inputs = (input, ) flops, params = profile(_model, inputs, custom_hooks, verbose) del _model elif vega.is_tf_backend(): import tensorflow.compat.v1 as tf with tf.Graph().as_default() as graph: dummy_input = tf.placeholder(dtype=tf.float32, shape=input.shape.as_list()) _model.training = False _model(dummy_input) opts = tf.profiler.ProfileOptionBuilder.float_operation() flops = tf.profiler.profile(graph, cmd='op', options=opts).total_float_ops opts = tf.profiler.ProfileOptionBuilder.trainable_variables_parameter( ) params = tf.profiler.profile(graph, cmd='op', options=opts).total_parameters flops *= 0.5 del _model elif vega.is_ms_backend(): total_params = 0 for param in model.trainable_params(): total_params += np.prod(param.shape) params = total_params # TODO flops = 0 return flops, params
def before_train(self, logs=None): """Be called before the train process.""" self.config = self.trainer.config self.device = self.trainer.config.device self.base_net_desc = self.trainer.config.codec if vega.is_torch_backend(): self.trainer.model._init_weights() count_input = torch.FloatTensor(1, 3, 32, 32).to(self.device) elif vega.is_tf_backend(): tf.reset_default_graph() count_input = tf.random_uniform([1, 32, 32, 3], dtype=tf.float32) self.flops_count, self.params_count = calc_model_flops_params( self.trainer.model, count_input) self.validate() self.trainer.model = self._generate_init_model(self.trainer.model)
def _generate_init_model(self, model_prune): """Generate init model by loading pretrained model. :param model_prune: searched pruned model :type model_prune: torch.nn.Module :return: initial model after loading pretrained model :rtype: torch.nn.Module """ model_init = self._new_model_init(model_prune) chn_node_mask = self._init_chn_node_mask(model_prune) if vega.is_torch_backend(): return self._load_torch_model(model_prune, model_init, chn_node_mask) elif vega.is_tf_backend(): return self._load_tf_model(model_prune, model_init, chn_node_mask)
def is_filtered(self, desc=None): """Filter function of latency.""" if self.max_latency is None: return False model, count_input = self.get_model_input(desc) trainer = ClassFactory.get_cls(ClassType.TRAINER)(model_desc=desc) sess_config = trainer._init_session_config() if vega.is_tf_backend( ) else None latency = calc_forward_latency(model, count_input, sess_config) logging.info('Sampled model\'s latency: {}ms'.format(latency)) if latency > self.max_latency: logging.info('The latency is out of range. Skip this network.') return True else: return False
def before_train(self, logs=None): """Be called before the training process.""" self.config = self.trainer.config if vega.is_torch_backend(): count_input = torch.FloatTensor(1, 3, 192, 192).cuda() elif vega.is_tf_backend(): tf.compat.v1.reset_default_graph() count_input = tf.random.uniform([1, 192, 192, 3], dtype=tf.float32) elif vega.is_ms_backend(): count_input = mindspore.Tensor( np.random.randn(1, 3, 192, 192).astype(np.float32)) flops_count, params_count = calc_model_flops_params( self.trainer.model, count_input) self.flops_count, self.params_count = flops_count * 1e-9, params_count * 1e-3 logger.info("Flops: {:.2f} G, Params: {:.1f} K".format( self.flops_count, self.params_count))
def _train_epoch(self): if vega.is_torch_backend(): self.model.train() for batch_index, batch in enumerate(self.train_loader): batch = self.make_batch(batch) batch_logs = {'train_batch': batch} self.callbacks.before_train_step(batch_index, batch_logs) train_batch_output = self.train_step(batch) batch_logs.update(train_batch_output) if self.config.is_detection_trainer: batch_logs.update({'is_detection_trainer': True}) self.callbacks.after_train_step(batch_index, batch_logs) elif vega.is_tf_backend(): self.estimator.train(input_fn=self.train_input_fn, steps=len(self.train_loader), hooks=self._init_logging_hook())