def __init__(self): self.name = '' self.data_format = General.data_format self._modules = Config() self._parameters = OrderedDict() self._weights_buffer = OrderedDict() self._init_configs()
def __init__(self): self.parent_scope_name = '' self._scope_name = '' self._modules = Config() self._training = True self.enable_scope_name = enable_scope_name self.data_format = General.data_format
def _set_startup(args): if args.startup in ['benchmark', 'b']: cfg = Config(args.config_file) config = deepcopy(cfg) if 'benchmark' in cfg.keys(): benchmark_config = cfg.pop('benchmark') config = update_dict(benchmark_config, cfg) else: config = Config(args.config_file) return config
def __init__(self): self.parent_scope_name = '' self._scope_name = '' self._modules = Config() self._training = True self.enable_scope_name = enable_scope_name self.data_format = General.data_format self.pretrained_model_file = None self._is_load_pretrained = False self.load_pretrained_type = None self._trainable = True self.pretrained_prefix = None
def __init__(self, config=None): """Initialize.""" self.is_multi_opt = False if config is not None: self.config = Config(config) raw_config = self.config.to_dict() raw_config.type = self.config.type map_dict = OptimMappingDict self.map_config = ConfigBackendMapping( map_dict.type_mapping_dict, map_dict.params_mapping_dict).backend_mapping(raw_config) self.optim_cls = ClassFactory.get_cls(ClassType.OPTIMIZER, self.map_config.type)
def __init__(self, **desc): """Initialize.""" super(SimpleCnn, self).__init__() desc = Config(**desc) self.num_class = desc.num_class self.fp16 = desc.get('fp16', False) self.channels = desc.channels self.conv1 = ops.Conv2d(3, 32, padding=1, kernel_size=3) self.pool1 = ops.MaxPool2d(2, stride=2) self.blocks = self._blocks(self.channels, desc.blocks) self.pool2 = ops.MaxPool2d(2, stride=2) self.conv2 = ops.Conv2d(self.channels, 64, padding=1, kernel_size=3) self.global_conv = ops.Conv2d(64, 64, kernel_size=8) self.view = ops.View() self.fc = ops.Linear(64, self.num_class)
def __init__(self, search_space=None, **kwargs): super(SpNasCodec, self).__init__(search_space, **kwargs) config_template_file = search_space.config_template_file assert config_template_file is not None self.config_template = Config(config_template_file) if 'epoch' in search_space.keys(): self.config_template['total_epochs'] = search_space.epoch
def _parse_args(sections, desc): parser = argparse.ArgumentParser(description=desc) parser.add_argument("-backend", "--general.backend", default="pytorch", type=str, help="pytorch|tensorflow|mindspore") if "cluster" in sections: parser.add_argument("-devices_per_trainer", "--general.worker.devices_per_trainer", default=None, type=int) parser.add_argument("-master_ip", "--general.cluster.master_ip", default=None, type=str) parser.add_argument("-listen_port", "--general.cluster.listen_port", default=8000, type=int) parser.add_argument("-slaves", "--general.cluster.slaves", default=[], action='store', dest='general.cluster.slaves', type=str, nargs='*', help="slave IP list") parser.add_argument("-dataset", "--dataset.type", required=True, type=str, help="dataset name.") parser.add_argument("-data_path", "--dataset.common.data_path", type=str, help="dataset path.") parser.add_argument("-batch_size", "--dataset.common.batch_size", default=256, type=int) if "model" in sections: parser.add_argument("-model_desc", "--model.model_desc", type=str) parser.add_argument("-model_file", "--model.pretrained_model_file", type=str) if "trainer" in sections: parser.add_argument("-epochs", "--trainer.epochs", type=int) if "fine_tune" in sections: parser.add_argument("-task_type", "--task_type", default="classification", type=str, help="classification|detection|segmentation|super_resolution") parser.add_argument("-num_classes", "--trainer.num_classes", type=int) parser.add_argument("-evaluator", "--evaluator", default=[], action='store', dest='evaluator', type=str, nargs='*', help="evaluator list, eg. -evaluator GpuEvaluator DavinciMobileEvaluator") args = vars(parser.parse_args()) args = {key: value for key, value in args.items() if args[key]} tree = Config(build_tree(args)) return tree
def _set_config(args, step_name, step_type): """Fully train.""" # general General.step_name = step_name if hasattr(args, "general"): General.from_json(args.general) # pipeline PipelineConfig.steps = [step_name] # pipestep PipeStepConfig.type = step_type # model if hasattr(args, "model"): if hasattr(args.model, "model_desc"): args.model.model_desc = Config(args.model.model_desc) PipeStepConfig.model.from_json(args.model) # dataset if hasattr(args, "dataset"): PipeStepConfig.dataset.from_json(args.dataset) # trainer if hasattr(args, "trainer"): TrainerConfig.from_json(args.trainer) # evaluator if hasattr(args, "evaluator"): # PipeStepConfig.evaluator._type_name = args.evaluator if "GpuEvaluator" in args.evaluator: PipeStepConfig.evaluator_enable = True PipeStepConfig.evaluator.gpu_evaluator_enable = True if "DavinciMobileEvaluator" in args.evaluator: PipeStepConfig.evaluator_enable = True PipeStepConfig.evaluator.davinci_mobile_evaluator_enable = True
def _create_examples(self, lines, set_type): """Create examples for the training, dev and test sets.""" examples = [] for (i, line) in enumerate(lines): if i == 0: continue guid = "%s-%s" % (set_type, i) text_a = line[3] text_b = line[4] label = None if set_type == "test" else line[0] examples.append( Config( dict(guid=guid, text_a=text_a, text_b=text_b, label=label))) return examples
def __new__(cls, *args, **kwargs): """Record params.""" desc = {} params_sig = sig(cls.__init__).parameters param_names = list(params_sig.keys()) if len(param_names) > len(args): # not dynamic parameter for connections for idx, arg in enumerate(args): arg_name = param_names[idx + 1] desc[arg_name] = arg if kwargs: desc.update(kwargs) instance = super(Serializable, cls).__new__(cls) instance._deep_level = 0 instance._target_level = None instance.desc = Config(desc) return instance
class LrScheduler(object): """Register and call LrScheduler class.""" config = LrSchedulerConfig() def __init__(self, config=None): """Initialize.""" # register pytorch optim as default if config: self.config = Config(config) raw_config = deepcopy(self.config) else: self.config = LrScheduler.config raw_config = self.config.to_json() raw_config.type = self.config.type map_dict = LrSchedulerMappingDict() self.map_config = ConfigBackendMapping( map_dict.type_mapping_dict, map_dict.params_mapping_dict).backend_mapping(raw_config) self._cls = ClassFactory.get_cls(ClassType.LR_SCHEDULER, self.map_config.type) def __call__(self, optimizer=None, epochs=None, steps=None): """Call lr scheduler class.""" params = self.map_config.get("params", {}) logging.debug("Call LrScheduler. name={}, params={}".format( self._cls.__name__, params)) setattr(self._cls, "by_epoch", True) if hasattr(self.config, "by_epoch"): setattr(self._cls, "by_epoch", self.config.by_epoch) try: if params: return self._cls(optimizer, **params) else: return self._cls(optimizer) except Exception as ex: logging.error( "Failed to call LrScheduler name={}, params={}".format( self._cls.__name__, params)) raise ex
def _init_hps(self, hps=None): """Load hps from file.""" # load config if hps is not None: pass elif self.config.hps_file is not None: desc_file = self.config.hps_file.replace("{local_base_path}", self.local_base_path) hps = Config(desc_file) if "trainer" in hps: if "epochs" in hps["trainer"]: hps["trainer"].pop("epochs") if "checkpoint_path" in hps["trainer"]: hps["trainer"].pop("checkpoint_path") elif self.config.hps_folder is not None: folder = self.config.hps_folder.replace("{local_base_path}", self.local_base_path) pattern = FileOps.join_path(folder, "hps_*.json") desc_file = glob.glob(pattern)[0] hps = Config(desc_file) if "trainer" in hps: if "epochs" in hps["trainer"]: hps["trainer"].pop("epochs") if "checkpoint_path" in hps["trainer"]: hps["trainer"].pop("checkpoint_path") # merge config if not self.hps: self.hps = hps elif hps: hps.from_dict(self.hps) self.hps = hps # set config if self.hps and self.hps.get('trainer'): self.config.from_dict(self.hps.get('trainer')) self.load_checkpoint = self.config.load_checkpoint self.epochs = self.config.epochs
def __init__(self, config=None): """Initialize.""" self.is_multi_opt = True if config is not None: self.config = Config(config) self._opts = OrderedDict()
class TrainerBase(DistributedWorker): """Trainer base class.""" config = TrainerConfig() def __init__(self, model=None, id=None, hps=None, load_ckpt_flag=False, model_desc=None, lazy_build=True, **kwargs): super().__init__() self.worker_type = WorkerTypes.TRAINER TrainerBase.__worker_id__ += 1 if id is not None: self._worker_id = id else: self._worker_id = TrainerBase.__worker_id__ # Data Memeber list of Trainer self.is_chief = True self.use_cuda = self.config.cuda self.epochs = self.config.epochs self.do_validation = True self.auto_save_ckpt = True self.auto_save_perf = True self.skip_train = False self.valid_interval = self.config.valid_interval self.hps = hps self.model = model self.model_desc = model_desc self.optimizer = None self.lr_scheduler = None self.loss = None self.use_syncbn = self.config.syncbn self.use_amp = self.config.amp self.train_metrics = None self.valid_metrics = None self.call_metrics_on_train = self.config.call_metrics_on_train self.train_verbose = self.config.train_verbose self.valid_verbose = self.config.valid_verbose self.train_report_steps = self.config.train_report_steps self.valid_report_steps = self.config.valid_report_steps self.train_loader = None self.valid_loader = None self.train_step = None self.valid_step = None self.make_batch = None self.model_fn = None self.train_input_fn = None self.valid_input_fn = None self.callbacks = None self.performance = None self.runtime = None self.load_checkpoint = False self._resume_training = False self._start_epoch = 0 self.visual_data = {} self.load_ckpt_flag = load_ckpt_flag self.distributed = self.config.distributed # Used by TimmTrainerCallbacks since it builds its trainer in # the before_train callback self.lazy_built = self.config.lazy_built # Indicate whether the necessary components of a trainer # has been built for running self._world_size = 1 self._rank_id = 0 self._local_rank_id = 0 self.config.kwargs = kwargs self.checkpoint_file_name = 'checkpoint.pth' self.model_pickle_file_name = 'model.pkl' worker_path = self.get_local_worker_path() self.model_path = FileOps.join_path(worker_path, self.model_pickle_file_name) self.checkpoint_file = FileOps.join_path(worker_path, self.checkpoint_file_name) self.weights_file = FileOps.join_path( worker_path, "model_{}.pth".format(self.worker_id)) self.loss_input = kwargs.get('loss_input', None) self.gpu_nums = kwargs.get('gpu_nums', 1) self.use_unsupervised_pretrain = self.config.use_unsupervised_pretrain if TrainerConfig.model_desc is None: TrainerConfig.model_desc = model_desc if not lazy_build: self.init_trainer() def init_trainer(self): """Init Trainer.""" init_log(level=General.logger.level, log_file="worker_{}.log".format(self.worker_id), log_path=self.get_local_worker_path()) self._set_default_funcs() self._set_condition() self._init_callbacks() self.callbacks.init_trainer() self.init_train_op() def train_process(self): """Whole train process of the TrainWorker specified in config. After training, the model and validation results are saved to local_worker_path and s3_path. """ init_log(level=General.logger.level, log_file="worker_{}.log".format(self.worker_id), log_path=self.local_log_path) self._set_default_funcs() self._set_condition() self._init_callbacks() self.callbacks.init_trainer() if not self.lazy_built: self.build() self._train_loop() def build(self): """Build the trainer by assembling the necessary components.""" logging.debug("Trainer Config: {}".format(self.config)) self._init_hps() self.do_validation = self.config.with_valid self.use_syncbn = self.config.syncbn if self.use_syncbn and zeus.is_torch_backend(): import apex self.model = apex.parallel.convert_syncbn_model(self.model) self.train_loader = self._init_dataloader(mode='train') self.valid_loader = self._init_dataloader(mode='val') self.batch_num_train = len(self.train_loader) self.batch_num_valid = len(self.valid_loader) def train(self, inputs, labels): """Train model.""" pass def predict(self, input): """Inference model.""" pass def save(self, file_name): """Save model.""" pass def load(self, model_name, by_name): """Load model.""" pass def set_weights(self, weights): """Set weight with memory tensor.""" pass def get_weights(self): """Get the weights.""" pass def init_trainer_op(self): """Init Train Op.""" pass def _train_epoch(self): pass def _valid_epoch(self): pass def _set_default_funcs(self): pass def _set_condition(self): pass def _init_tf_estimator(self): pass def _init_horovod_setting(self): """Init horovod setting.""" self.is_chief = True def _init_hps(self, hps=None): """Load hps from file.""" if hps is not None: self.hps = hps elif self.config.hps_file is not None: desc_file = self.config.hps_file.replace("{local_base_path}", self.local_base_path) self.hps = Config(desc_file) elif self.config.hps_folder is not None: folder = self.config.hps_folder.replace("{local_base_path}", self.local_base_path) pattern = FileOps.join_path(folder, "desc_*.json") desc_file = glob.glob(pattern)[0] self.hps = Config(desc_file) if self.hps and self.hps.get('trainer'): self.config.from_json(self.hps.get('trainer')) self.load_checkpoint = self.config.load_checkpoint self.epochs = self.config.epochs def _init_minimize_op(self, loss, global_step, var_list=None): """Init loss minimize operation, include loss scale method.""" loss_scale = self.config.loss_scale if self.use_amp else 1. if loss_scale != 1: scaled_grad_vars = self.optimizer.compute_gradients( loss * loss_scale, var_list=var_list) unscaled_grad_vars = [] for grad, var in scaled_grad_vars: unscaled_grad_vars.append((grad, var) if grad is None else ( grad / loss_scale, var)) minimize_op = self.optimizer.apply_gradients( unscaled_grad_vars, global_step) else: grad_vars = self.optimizer.compute_gradients(loss, var_list=var_list) minimize_op = self.optimizer.apply_gradients( grad_vars, global_step) return minimize_op def _init_metrics(self, metrics=None): """Init metrics.""" if metrics is not None: return metrics else: if zeus.is_torch_backend(): from zeus.metrics.pytorch.metrics import Metrics elif zeus.is_tf_backend(): from zeus.metrics.tensorflow.metrics import Metrics elif zeus.is_ms_backend(): from zeus.metrics.mindspore.metrics import Metrics return Metrics() def _init_dataloader(self, mode, loader=None, transforms=None): """Init dataloader.""" if loader is not None: return loader if mode == "train" and self.hps is not None and self.hps.get( "dataset") is not None: if self.hps.get("dataset") and self.hps.get("dataset").get('type'): dataset_cls = ClassFactory.get_cls( ClassType.DATASET, self.hps.get("dataset").get('type')) else: dataset_cls = ClassFactory.get_cls(ClassType.DATASET) dataset = dataset_cls(mode=mode, hps=self.hps.get("dataset")) elif self.hps: if self.hps.get("dataset") and self.hps.get("dataset").get('type'): dataset_cls = ClassFactory.get_cls( ClassType.DATASET, self.hps.get("dataset").get('type')) dataset = dataset_cls(mode=mode, hps=self.hps.get("dataset")) else: dataset_cls = ClassFactory.get_cls(ClassType.DATASET) dataset = dataset_cls(mode=mode) else: dataset_cls = ClassFactory.get_cls(ClassType.DATASET) dataset = dataset_cls(mode=mode) if transforms is not None: dataset.transforms = transforms if self.distributed and mode == "train": dataset.set_distributed(self._world_size, self._rank_id) # adapt the dataset to specific backend dataloader = Adapter(dataset).loader return dataloader def _train_loop(self): """Do the training with data, callbacks and step functions etc.""" # Allow user to build trainer in before_train() callback, but they # should set lazy_built in configuration file to True self.callbacks.before_train() if self.skip_train: return if self.use_unsupervised_pretrain and zeus.is_torch_backend(): from .trainer.simclr.transforms import TransformsSimCLR from .trainer.simclr.train import simclr_train train_loader = self._init_dataloader(mode="train", transforms=TransformsSimCLR()) self.model = simclr_train(self.model, train_loader) repeat_time = 1 if zeus.is_ms_backend() else self.epochs for epoch in range(self._start_epoch, repeat_time): epoch_logs = {'train_num_batches': self.batch_num_train} if self.do_validation: epoch_logs.update({'valid_num_batches': self.batch_num_valid}) self.callbacks.before_epoch(epoch, epoch_logs) self._train_epoch() if self.do_validation and self._should_run_validation(epoch): self._valid_epoch() self.callbacks.after_epoch(epoch) self.callbacks.after_train() if self.distributed: self._shutdown_distributed() def _should_run_validation(self, epoch): # Zero valid_interval means doesn't run _valid_loop of the trainer # and user may provide _valid_loop in other callbacks if self.valid_interval == 0: return False else: return epoch % self.valid_interval == 0 or (epoch + 1) == self.epochs def _init_callbacks(self): disables = [] customs = self.config.callbacks or [] if customs and not isinstance(customs, list): customs = [customs] if not self.config.model_statistics: disables.append('ModelStatistics') self.callbacks = CallbackList(customs, disables) self.callbacks.set_trainer(self) def _metric_average(self, val, name): """Do metric average. :param val: input value :param name: metric name :return: """ import torch import horovod.torch as hvd tensor = torch.tensor(val) avg_tensor = hvd.allreduce(tensor, name=name) return avg_tensor.item() def _backup(self): """Backup result worker folder.""" if self.need_backup is True and self.backup_base_path is not None: backup_worker_path = FileOps.join_path(self.backup_base_path, self.get_worker_subpath()) FileOps.copy_folder( self.get_local_worker_path(self.step_name, self.worker_id), backup_worker_path) def _shutdown_distributed(self): if zeus.is_npu_device() and self.distributed: self.sess.run(self.npu_shutdown) self.sess.close()
def convert_examples_to_features(self, examples, label_list, max_seq_length, tokenizer): """Load a data file into a list of `InputBatch`s.""" label_map = {label: i for i, label in enumerate(label_list)} features = [] for (ex_index, example) in enumerate(examples): tokens_a = tokenizer.tokenize(example.text_a) tokens_b = None if example.text_b: tokens_b = tokenizer.tokenize(example.text_b) # Modifies `tokens_a` and `tokens_b` in place so that the total # length is less than the specified length. # Account for [CLS], [SEP], [SEP] with "- 3" _truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3) else: # Account for [CLS] and [SEP] with "- 2" if len(tokens_a) > max_seq_length - 2: tokens_a = tokens_a[:(max_seq_length - 2)] # The convention in BERT is: # (a) For sequence pairs: # tokens: [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP] # type_ids: 0 0 0 0 0 0 0 0 1 1 1 1 1 1 # (b) For single sequences: # tokens: [CLS] the dog is hairy . [SEP] # type_ids: 0 0 0 0 0 0 0 # # Where "type_ids" are used to indicate whether this is the first # sequence or the second sequence. The embedding vectors for `type=0` and # `type=1` were learned during pre-training and are added to the wordpiece # embedding vector (and position vector). This is not *strictly* necessary # since the [SEP] token unambigiously separates the sequences, but it makes # it easier for the model to learn the concept of sequences. # # For classification tasks, the first vector (corresponding to [CLS]) is # used as as the "sentence vector". Note that this only makes sense because # the entire model is fine-tuned. tokens = ["[CLS]"] + tokens_a + ["[SEP]"] segment_ids = [0] * len(tokens) if tokens_b: tokens += tokens_b + ["[SEP]"] segment_ids += [1] * (len(tokens_b) + 1) input_ids = tokenizer.convert_tokens_to_ids(tokens) # The mask has 1 for real tokens and 0 for padding tokens. Only real # tokens are attended to. input_mask = [1] * len(input_ids) # Zero-pad up to the sequence length. padding = [0] * (max_seq_length - len(input_ids)) input_ids += padding input_mask += padding segment_ids += padding assert len(input_ids) == max_seq_length assert len(input_mask) == max_seq_length assert len(segment_ids) == max_seq_length label_id = label_map[example.label] if ex_index < 5: logging.info("*** Example ***") logging.info("guid: %s" % (example.guid)) logging.info("tokens: %s" % " ".join([str(x) for x in tokens])) logging.info("input_ids: %s" % " ".join([str(x) for x in input_ids])) logging.info("input_mask: %s" % " ".join([str(x) for x in input_mask])) logging.info("segment_ids: %s" % " ".join([str(x) for x in segment_ids])) logging.info("label: %s (id = %d)" % (example.label, label_id)) features.append( Config( dict(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids, label_id=label_id))) return features
class Module(object): """Base Module to adapter tf Module.""" def __init__(self): self.name = '' self.data_format = General.data_format self._modules = Config() self._parameters = OrderedDict() self._weights_buffer = OrderedDict() self._init_configs() def _init_configs(self): self._training = True self._trainable = True self.weight_file = None self.from_weight_type = None self._is_load_pretrained = False self._is_adaptive_weight = False self.exclude_weight_prefix = None def add_module(self, name, model): """Add models into self._models.""" setattr(self, str(name), model) def build(self): """Build model or params.""" pass def named_modules(self): """Return names spaces.""" self._apply_names() _modules = [] for module in self.children(): _modules.append((module.name, module)) _modules.extend(module.named_modules()) return _modules def named_children(self): """Return names children.""" return [(name, module) for name, module in self._modules.items()] def children(self): """Get child models of current Module.""" for model in self._modules.values(): yield model def load_checkpoint(self, weight_file): """Load weight state dict from last checkpoint file.""" if not weight_file: return logging.info("Load checkpoint form file ({}).".format(weight_file)) # model_file = tf.train.latest_checkpoint(weight_file) reader = tf.train.NewCheckpointReader(weight_file) variables = reader.get_variable_to_shape_map() states = {v: reader.get_tensor(v) for v in variables} self.load_checkpoint_from_numpy(states) def load_checkpoint_from_numpy(self, states): """Load checkpoint from numpy.""" states = self._exclude_checkpoint_by_prefix(states) for name, module in self.named_modules(): child_state = [(k, v) for k, v in states.items() if k.startswith(module.name + '/')] for k, v in child_state: module.set_weights(k, v) def _exclude_checkpoint_by_prefix(self, states): if self.exclude_weight_prefix: if not isinstance(self.exclude_weight_prefix, list): self.exclude_weight_prefix = [self.exclude_weight_prefix] for prefix in self.exclude_weight_prefix: states = {k: v for k, v in states.items() if not k.startswith(prefix)} return states def set_weights(self, name, value): """Set weights into weights buffer.""" self._weights_buffer[name] = value @property def training(self): """Get training flag.""" return self._training @training.setter def training(self, value): """Set training flag.""" self._training = value for module in self.children(): module.training = value @property def is_adaptive_weight(self): """Get _is_adaptive_weight flag.""" return self._is_adaptive_weight @is_adaptive_weight.setter def is_adaptive_weight(self, value): """Set _is_adaptive_weight flag.""" self._is_adaptive_weight = value for module in self.children(): module.is_adaptive_weight = value def freeze(self): """Set training flag.""" self._trainable = False for module in self.children(): module.freeze() def __setattr__(self, key, value): """Set name to modules.""" super().__setattr__(key, value) if isinstance(value, Module): self._modules[key] = value def set_parameters(self, name, value): """Set Parameters.""" self._parameters[name] = value setattr(self, name, value) return self.name def get_weights(self, name=None): """Get weights by name.""" if self._weights_buffer: return self._weights_buffer return tf.get_default_graph().get_tensor_by_name('{}:0'.format(name)) def get_all_weights(self): """Get all weights.""" all_weights = OrderedDict() for child in self.children(): all_weights.update(child._weights_buffer) if isinstance(child, Module): all_weights.update(child.get_all_weights()) return all_weights def get_weight_ops(self, name): """Get weight ops.""" all_weight = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) weight_ops = [t for t in all_weight if not t.name.startswith(name)] return weight_ops def call(self, inputs, *args, **kwarg): """Call inputs.""" output = inputs for model in self.children(): output = model(output) return output def adaptive_weight(self, inputs): """Adaptive weight.""" return {} def _apply_names(self, parent_name=''): """Apply names spaces.""" for scope_name, module in self._modules.items(): scope_name = '{}.{}'.format(parent_name, scope_name) if parent_name else scope_name module.name = module.name or scope_name + '/' + module.__class__.__name__ module._apply_names(scope_name) def _apply_parameters(self): """Apply names spaces.""" for name, params in self._parameters.items(): setattr(self, name, tf.Variable(params, name='{}.{}'.format(self.name, name) if self.name else name)) def __call__(self, inputs, *args, **kwargs): """Call call function.""" self.build() self._apply_parameters() self._apply_names() for module in self.children(): module._is_load_pretrained = True out = self.call(inputs, *args, **kwargs) self._apply_weights(inputs) return out def _apply_weights(self, inputs): if not self._weights_buffer: return variables = tf.get_collection(tf.GraphKeys.VARIABLES) if self.is_adaptive_weight: self._weights_buffer.update(self.adaptive_weight(inputs)) values = [(var, self._weights_buffer.get(var.name.replace(':0', ''))) for var in variables if var.name.replace(':0', '') in self._weights_buffer] for v, weight in values: v._initializer_op = state_ops.assign(v, weight) self._weights_buffer.clear() def modules(self): """Get the current modules.""" if self._modules.values(): return self._modules.values() else: return [self]
class Optimizer(object): """Register and call Optimizer class.""" config = OptimConfig() def __new__(cls, *args, **kwargs): """Create optimizer or multi-optimizer class.""" if isinstance(cls.config.to_dict, list): t_cls = ClassFactory.get_cls(ClassType.OPTIMIZER, 'MultiOptimizers') return super().__new__(t_cls) return super().__new__(cls) def __init__(self, config=None): """Initialize.""" self.is_multi_opt = False if config is not None: self.config = Config(config) raw_config = self.config.to_dict() raw_config.type = self.config.type map_dict = OptimMappingDict self.map_config = ConfigBackendMapping( map_dict.type_mapping_dict, map_dict.params_mapping_dict).backend_mapping(raw_config) self.optim_cls = ClassFactory.get_cls(ClassType.OPTIMIZER, self.map_config.type) def __call__(self, model=None, distributed=False, **kwargs): """Call Optimizer class. :param model: model, used in torch case :param distributed: use distributed :return: optimizer """ params = self.map_config.get("params", {}) logging.debug("Call Optimizer. name={}, params={}".format( self.optim_cls.__name__, params)) optimizer = None try: if zeus.is_torch_backend(): learnable_params = [ param for param in model.parameters() if param.requires_grad ] optimizer = self.optim_cls(learnable_params, **params) if distributed: optimizer = self.set_distributed(optimizer, model) elif zeus.is_tf_backend(): optimizer = dynamic_optimizer(self.optim_cls, **params) elif zeus.is_ms_backend(): if "dynamic_lr" in kwargs: params.update({"learning_rate": kwargs["dynamic_lr"]}) learnable_params = [ param for param in model.trainable_params() if param.requires_grad ] optimizer = self.optim_cls(learnable_params, **params) return optimizer except Exception as ex: logging.error("Failed to call Optimizer name={}, params={}".format( self.optim_cls.__name__, params)) raise ex @classmethod def set_distributed(cls, optimizer, model=None): """Set distributed optimizer.""" if zeus.is_torch_backend(): optimizer = hvd.DistributedOptimizer( optimizer, named_parameters=model.named_parameters(), compression=hvd.Compression.none) elif zeus.is_tf_backend(): optim_class = hvd.DistributedOptimizer if zeus.is_gpu_device( ) else NPUDistributedOptimizer optimizer = dynamic_distributed_optimizer(optim_class, optimizer) return optimizer
class Module(object): """Base Module to adapter tf Module.""" data_format = 'channels_first' def __init__(self): self.parent_scope_name = '' self._scope_name = '' self._modules = Config() self._training = True self.enable_scope_name = enable_scope_name self.data_format = General.data_format self.pretrained_model_file = None self._is_load_pretrained = False self.load_pretrained_type = None self._trainable = True self.pretrained_prefix = None def add_module(self, name, model): """Add models into self._models.""" setattr(self, str(name), model) def named_modules(self): """Return names spaces.""" _names_modules = [] for model in self.children(): if isinstance(model, Module): _names_modules.append(((model._scope_name, model))) child_modules = model.named_modules() _names_modules.extend(child_modules) return _names_modules def named_children(self): """Return names children.""" return [(name, module) for name, module in self._modules.items()] def children(self): """Get child models of current Module.""" for model in self._modules.values(): if isinstance(model, Module): model._scope_name = "{}.{}".format( self._scope_name, model.parent_scope_name) if self._scope_name else model.parent_scope_name yield model def pretrained(self, pretrained_model_file=None): """Load Pretrained weights.""" if self._is_load_pretrained: return [] assign_vars = [] checkpoint_path = pretrained_model_file or self.pretrained_model_file if not checkpoint_path: return pretrained_prefix = self.pretrained_prefix or {self._scope_name: self._scope_name} if self.load_pretrained_type == 'pytorch': assign_vars = assign_pytorch_weights(checkpoint_path, pretrained_prefix) else: tf.train.init_from_checkpoint(checkpoint_path, pretrained_prefix) self._is_load_pretrained = True return assign_vars @property def training(self): """Get training flag.""" return self._training @training.setter def training(self, value): """Set training flag.""" self._training = value for module in self.children(): module.training = value @property def freeze(self): """Get training flag.""" return self.freeze @freeze.setter def freeze(self, value): """Set training flag.""" self._trainable = not value for module in self.children(): module.freeze = value def __setattr__(self, key, value): """Set name to modules.""" self.__dict__[key] = value # super().__setattr__(key, value) if isinstance(value, Module): if self.enable_scope_name: value.parent_scope_name = key self._modules[key] = value def __getattribute__(self, name): """Get modules by name.""" value = object.__getattribute__(self, name) if isinstance(value, Module) and self.enable_scope_name: value._scope_name = "{}.{}".format( self._scope_name, value.parent_scope_name) if self._scope_name else value.parent_scope_name return value def set_parameters(self, name, value): """Set Parameters.""" with tf.variable_scope('', reuse=tf.AUTO_REUSE): setattr(self, name, tf.get_variable(name, initializer=value)) def get_weights(self, name): """Get weights by name.""" return tf.get_default_graph().get_tensor_by_name('{}:0'.format(name)) def get_weight_ops(self, name): """Get weight ops.""" all_weight = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) weight_ops = [t for t in all_weight if not t.name.startswith(name)] return weight_ops def call(self, inputs, *args, **kwarg): """Call inputs.""" output = inputs for model in self.children(): output = model(output) return output def __call__(self, inputs, *args, **kwargs): """Call call function.""" return self.call(inputs, *args, **kwargs) def modules(self): """Get the current modules.""" if self._modules.values(): return self._modules.values() else: return [self]