def test_get_lr_parameter_with_group(): net = LeNet5() conv_lr = 0.1 default_lr = 0.3 conv_params = list( filter(lambda x: 'conv' in x.name, net.trainable_params())) no_conv_params = list( filter(lambda x: 'conv' not in x.name, net.trainable_params())) group_params = [{ 'params': conv_params, 'lr': conv_lr }, { 'params': no_conv_params, 'lr': default_lr }] opt = SGD(group_params) assert opt.is_group_lr is True for param in opt.parameters: lr = opt.get_lr_parameter(param) if 'conv' in param.name: cur_name = 'learning_rate_group_' + '0' else: cur_name = 'learning_rate_group_' + '1' assert lr.name == cur_name lr_list = opt.get_lr_parameter(conv_params) for lr, param in zip(lr_list, conv_params): assert lr.name == 'learning_rate_group_' + '0'
def test_get_lr_parameter_with_order_group(): net = LeNet5() conv_lr = 0.1 conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params())) group_params = [{'params': conv_params, 'lr': conv_lr}, {'order_params': net.trainable_params()}] opt = SGD(group_params) assert opt.is_group_lr is True for param in opt.parameters: lr = opt.get_lr_parameter(param) assert lr.name == 'lr_' + param.name
def test_order_params_2(): net = LeNet5() conv_weight_decay = 0.01 fc1_lr = (0.5, 0.4, 0.3) default_lr = 0.1 default_wd = 0.0 conv_params = list( filter(lambda x: 'conv' in x.name, net.trainable_params())) fc1_params = list(filter(lambda x: 'fc1' in x.name, net.trainable_params())) group_params = [{ 'params': fc1_params, 'lr': fc1_lr }, { 'params': conv_params, 'weight_decay': conv_weight_decay }, { 'order_params': fc1_params + conv_params }] opt = SGD(group_params, learning_rate=default_lr, weight_decay=default_wd) assert opt.is_group is True assert opt.is_group_lr is True assert opt.is_group_params_ordered is True all_lr = opt.get_lr_parameter(fc1_params + conv_params) for weight_decay, decay_flags, lr, param, order_param in zip( opt.weight_decay, opt.decay_flags, all_lr, opt.parameters, fc1_params + conv_params): if 'conv' in param.name: assert np.all(lr.data.asnumpy() == Tensor( np.array([default_lr] * 3), mstype.float32).asnumpy()) assert weight_decay == conv_weight_decay assert decay_flags is True elif 'fc1' in param.name: assert np.all( lr.data.asnumpy() == Tensor(fc1_lr, mstype.float32).asnumpy()) assert weight_decay == default_wd assert decay_flags is False else: assert np.all(lr.data.asnumpy() == Tensor( np.array([default_lr] * 3), mstype.float32).asnumpy()) assert weight_decay == default_wd assert decay_flags is False assert param.name == order_param.name if 'conv' in param.name: assert lr.name == 'learning_rate' elif 'fc1' in param.name: assert lr.name == 'learning_rate_group_' + '0'
def test_init(self): with pytest.raises(ValueError): SGD(params, learning_rate=0.1, momentum=-0.1, dampening=0, weight_decay=0, nesterov=False) with pytest.raises(ValueError): SGD(params, learning_rate=0.12, momentum=-0.1, dampening=0, weight_decay=0, nesterov=False) SGD(params)
def test_SGD(): epoch = 3 net = NetSGD() learning_rate = 0.1 momentum = 0.9 dampening = 0.0 weight_decay = 0.0 nesterov = True loss_scale = 1.0 optimizer = SGD(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum, dampening, weight_decay, nesterov, loss_scale) criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() losses = [] for _ in range(epoch): data = Tensor( np.arange(0, 16).reshape(1, 1, 4, 4).astype(np.float32) * 0.01) label = Tensor(np.array([0]).astype(np.int32)) loss = train_network(data, label) losses.append(loss.asnumpy()) last_loss = 100.0 for loss in losses: assert last_loss > loss last_loss = loss return losses
def test_order_params_all_1(): net = LeNet5() conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params())) bias_params = list(filter(lambda x: 'bias' in x.name, net.trainable_params())) group_params = [{'params': conv_params, 'weight_decay': 0.01}, {'params': bias_params, 'lr': 0.01}, {'order_params': net.trainable_params()}] opt = SGD(group_params, learning_rate=0.1, weight_decay=0.0) assert opt.is_group is True assert opt.is_group_lr is True assert opt.is_group_params_ordered is True for weight_decay, decay_flags, lr, param, order_param in zip( opt.weight_decay, opt.decay_flags, opt.learning_rate, opt.parameters, net.trainable_params()): if param in conv_params: assert np.all(lr.data.asnumpy() == Tensor(0.1, mstype.float32).asnumpy()) assert weight_decay == 0.01 assert decay_flags is True elif param in bias_params: assert np.all(lr.data.asnumpy() == Tensor(0.01, mstype.float32).asnumpy()) assert weight_decay == 0.0 assert decay_flags is False else: assert np.all(lr.data.asnumpy() == Tensor(0.1, mstype.float32).asnumpy()) assert weight_decay == 0.0 assert decay_flags is False assert param.name == order_param.name assert lr.name == 'lr_' + param.name
def test_weight_decay(): inputs = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01) label = Tensor(np.ones([1, 10]).astype(np.float32)) net = LeNet5() conv_weight_decay = 0.8 default_weight_decay = 0.0 conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params())) no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params())) group_params = [{'params': no_conv_params}, {'params': conv_params, 'weight_decay': conv_weight_decay}, {'order_params': net.trainable_params()}] net.set_train() loss = nn.SoftmaxCrossEntropyWithLogits() opt = SGD(group_params, learning_rate=0.1, weight_decay=default_weight_decay) assert opt.is_group is True assert opt.is_group_lr is False assert opt.is_group_params_ordered is True for weight_decay, decay_flags, param, order_param in zip( opt.weight_decay, opt.decay_flags, opt.parameters, net.trainable_params()): if param in conv_params: assert weight_decay == conv_weight_decay assert decay_flags is True else: assert weight_decay == default_weight_decay assert decay_flags is False assert param.name == order_param.name net_with_loss = WithLossCell(net, loss) train_network = TrainOneStepCell(net_with_loss, opt) _executor.compile(train_network, inputs, label)
def test_get_lr_parameter_with_no_group(): net = LeNet5() conv_weight_decay = 0.8 conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params())) no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params())) group_params = [{'params': conv_params, 'weight_decay': conv_weight_decay}, {'params': no_conv_params}] opt = SGD(group_params) assert opt.is_group_lr is False for param in opt.parameters: lr = opt.get_lr_parameter(param) assert lr.name == opt.learning_rate.name params_error = [1, 2, 3] with pytest.raises(TypeError): opt.get_lr_parameter(params_error)
def test_get_order_params_with_not_include(): net = LeNet5() conv_weight_decay = 0.8 conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params())) no_conv_params = list(filter(lambda x: 'conv' not in x.name, net.trainable_params())) group_params = [{'params': conv_params, 'weight_decay': conv_weight_decay}, {'order_params': no_conv_params}] with pytest.raises(ValueError): SGD(group_params)
def test_order_params_lr(): net = LeNet5() conv_lr = 0.01 default_lr = 0.1 conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params())) group_params = [{'params': conv_params, 'lr': conv_lr}, {'order_params': net.trainable_params()}] opt = SGD(group_params, learning_rate=default_lr) assert opt.is_group is True assert opt.is_group_lr is True assert opt.is_group_params_ordered is True for lr, param, order_param in zip(opt.learning_rate, opt.parameters, net.trainable_params()): if param in conv_params: assert np.all(lr.data.asnumpy() == Tensor(conv_lr, mstype.float32).asnumpy()) else: assert np.all(lr.data.asnumpy() == Tensor(default_lr, mstype.float32).asnumpy()) assert param.name == order_param.name assert lr.name == 'lr_' + param.name
def test_order_params_weight_decay(): net = LeNet5() conv_weight_decay = 0.01 default_wd = 0.0 default_lr = 0.1 conv_params = list(filter(lambda x: 'conv' in x.name, net.trainable_params())) group_params = [{'params': conv_params, 'weight_decay': conv_weight_decay}, {'order_params': net.trainable_params()}] opt = SGD(group_params, learning_rate=default_lr, weight_decay=default_wd) assert opt.is_group is True assert opt.is_group_lr is False assert opt.is_group_params_ordered is True assert opt.learning_rate.name == "learning_rate" assert np.all(opt.learning_rate.data.asnumpy() == Tensor(default_lr, mstype.float32).asnumpy()) for weight_decay, decay_flags, param, order_param in zip( opt.weight_decay, opt.decay_flags, opt.parameters, net.trainable_params()): if param in conv_params: assert weight_decay == conv_weight_decay assert decay_flags is True else: assert weight_decay == default_wd assert decay_flags is False assert param.name == order_param.name
def test_Sgd_init(self): with pytest.raises(TypeError): paramsTensor = Tensor(np.zeros([1, 2, 3])) SGD(paramsTensor)
def test_Sgd_init(self): with pytest.raises(ValueError): SGD(None)
def test_Sgd_init(self): with pytest.raises(TypeError): SGD(paramsTensor)
def main(): cfg, args = init_argument() loss_meter = AverageMeter('loss') # dataloader cfg.logger.info('start create dataloader') de_dataset, steps_per_epoch, class_num = get_de_dataset(cfg) cfg.steps_per_epoch = steps_per_epoch cfg.logger.info('step per epoch: %s', cfg.steps_per_epoch) de_dataloader = de_dataset.create_tuple_iterator() cfg.logger.info('class num original: %s', class_num) if class_num % 16 != 0: class_num = (class_num // 16 + 1) * 16 cfg.class_num = class_num cfg.logger.info('change the class num to: %s', cfg.class_num) cfg.logger.info('end create dataloader') # backbone and loss cfg.logger.important_info('start create network') create_network_start = time.time() network = SphereNet(num_layers=cfg.net_depth, feature_dim=cfg.embedding_size, shape=cfg.input_size) if args.device_target == 'CPU': head = CombineMarginFC(embbeding_size=cfg.embedding_size, classnum=cfg.class_num) else: head = CombineMarginFCFp16(embbeding_size=cfg.embedding_size, classnum=cfg.class_num) criterion = CrossEntropy() # load the pretrained model if os.path.isfile(cfg.pretrained): param_dict = load_checkpoint(cfg.pretrained) param_dict_new = {} for key, values in param_dict.items(): if key.startswith('moments.'): continue elif key.startswith('network.'): param_dict_new[key[8:]] = values else: param_dict_new[key] = values load_param_into_net(network, param_dict_new) cfg.logger.info('load model %s success', cfg.pretrained) # mixed precision training if args.device_target == 'CPU': network.add_flags_recursive(fp32=True) head.add_flags_recursive(fp32=True) else: network.add_flags_recursive(fp16=True) head.add_flags_recursive(fp16=True) criterion.add_flags_recursive(fp32=True) train_net = BuildTrainNetworkWithHead(network, head, criterion) # optimizer and lr scheduler lr = step_lr(lr=cfg.lr, epoch_size=cfg.epoch_size, steps_per_epoch=cfg.steps_per_epoch, max_epoch=cfg.max_epoch, gamma=cfg.lr_gamma) opt = SGD(params=train_net.trainable_params(), learning_rate=lr, momentum=cfg.momentum, weight_decay=cfg.weight_decay, loss_scale=cfg.loss_scale) # package training process, adjust lr + forward + backward + optimizer train_net = TrainOneStepCell(train_net, opt, sens=cfg.loss_scale) # checkpoint save if cfg.local_rank == 0: ckpt_max_num = cfg.max_epoch * cfg.steps_per_epoch // cfg.ckpt_interval train_config = CheckpointConfig( save_checkpoint_steps=cfg.ckpt_interval, keep_checkpoint_max=ckpt_max_num) ckpt_cb = ModelCheckpoint(config=train_config, directory=cfg.outputs_dir, prefix='{}'.format(cfg.local_rank)) cb_params = _InternalCallbackParam() cb_params.train_network = train_net cb_params.epoch_num = ckpt_max_num cb_params.cur_epoch_num = 1 run_context = RunContext(cb_params) ckpt_cb.begin(run_context) train_net.set_train() t_end = time.time() t_epoch = time.time() old_progress = -1 cfg.logger.important_info('====start train====') for i, total_data in enumerate(de_dataloader): data, gt = total_data data = Tensor(data) gt = Tensor(gt) loss = train_net(data, gt) loss_meter.update(loss.asnumpy()) # ckpt if cfg.local_rank == 0: cb_params.cur_step_num = i + 1 # current step number cb_params.batch_num = i + 2 ckpt_cb.step_end(run_context) # logging loss, fps, ... if i == 0: time_for_graph_compile = time.time() - create_network_start cfg.logger.important_info('{}, graph compile time={:.2f}s'.format( cfg.task, time_for_graph_compile)) if i % cfg.log_interval == 0 and cfg.local_rank == 0: time_used = time.time() - t_end epoch = int(i / cfg.steps_per_epoch) fps = cfg.per_batch_size * ( i - old_progress) * cfg.world_size / time_used cfg.logger.info( 'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr={}'.format( epoch, i, loss_meter, fps, lr[i])) t_end = time.time() loss_meter.reset() old_progress = i if i % cfg.steps_per_epoch == 0 and cfg.local_rank == 0: epoch_time_used = time.time() - t_epoch epoch = int(i / cfg.steps_per_epoch) fps = cfg.per_batch_size * cfg.world_size * cfg.steps_per_epoch / epoch_time_used cfg.logger.info( '=================================================') cfg.logger.info( 'epoch time: epoch[{}], iter[{}], {:.2f} imgs/sec'.format( epoch, i, fps)) cfg.logger.info( '=================================================') t_epoch = time.time() cfg.logger.important_info('====train end====')