def test_lenet5_train_step_training_pynative(): """test_lenet5_train_step_training_pynative""" context.set_context(mode=context.PYNATIVE_MODE) context.reset_auto_parallel_context() context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, device_num=8, mirror_mean=True) predict = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01) label = Tensor(np.zeros([1, 10]).astype(np.float32)) DatasetLenet(predict, label, 2) network = LeNet5() loss_fn = nn.SoftmaxCrossEntropyWithLogits() optimizer = Momentum(network.get_parameters(), learning_rate=0.1, momentum=0.9) Model(network=network, loss_fn=loss_fn, optimizer=optimizer) context.set_context(mode=context.GRAPH_MODE) context.reset_auto_parallel_context()
def test_compile_fp16_lr_overflow_dynamic_graph(): inputs = Tensor(np.ones([16, 16]).astype(np.float32)) label = Tensor(np.zeros([16, 16]).astype(np.float32)) lr = Tensor(np.ones([1], np.float32) * 0.1) net = NetFP16(16, 16) loss = MSELoss() optimizer = Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9) net_with_loss = WithLossCell(net, loss) scale_manager = DynamicLossScaleManager() update_cell = scale_manager.get_update_cell() train_network = TrainOneStepWithLossScaleCell( net_with_loss, optimizer, scale_update_cell=update_cell) train_network.set_train() output = train_network(inputs, label) print("the result is ", output)
def train(net, data, label): learning_rate = 0.01 momentum = 0.9 optimizer = Momentum( filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() res = train_network(data, label) print("+++++++++Loss+++++++++++++") print(res) print("+++++++++++++++++++++++++++") diff = res.asnumpy() - 2.302585 assert np.all(diff < 1.e-6)
def test_compile_fp16_lr_overflow(): inputs = Tensor(np.ones([16, 16]).astype(np.float32)) label = Tensor(np.zeros([16, 16]).astype(np.float32)) scaling_sens = Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32) lr = Tensor(np.ones([1], np.float32) * 0.1) net = NetFP16(16, 16) loss = MSELoss() optimizer = Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9) net_with_loss = WithLossCell(net, loss) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer) train_network.set_train() output = train_network(inputs, label, scaling_sens) print("the result is ", output)
def test_compile_f16_model_train_fixed(): dataset_types = (np.float32, np.float32) dataset_shapes = ((16, 16), (16, 16)) dataset = MindDataSet(dataset_types, dataset_shapes) net = NetFP16(16, 16) net.set_train() scale_manager = FixedLossScaleManager() loss = MSELoss() optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) model = Model(net, loss_fn=loss, optimizer=optimizer, metrics=None, loss_scale_manager=scale_manager) model.train(2, dataset)
def test_train_lenet(): epoch = 100 net = LeNet() momentum = initializer(Tensor(np.array([0.9]).astype(np.float32)), [1]) learning_rate = multisteplr(epoch, 30) optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() losses = [] for i in range(epoch): data = Tensor(np.ones([net.batch_size, 3, 32, 32]).astype(np.float32) * 0.01) label = Tensor(np.ones([net.batch_size]).astype(np.int32)) loss = train_network(data, label) losses.append(loss) print(losses)
def test_lars_float_lr(): inputs = Tensor(np.ones([1, 64]).astype(np.float32)) label = Tensor(np.zeros([1, 10]).astype(np.float32)) net = Net() net.set_train() loss = nn.SoftmaxCrossEntropyWithLogits() lr = 0.1 SGD = Momentum(net.trainable_params(), lr, 0.9) optimizer = LARS(SGD, epsilon=1e-08, hyperpara=0.02, decay_filter=lambda x: 'bn' not in x.name, lars_filter=lambda x: 'bn' not in x.name) net_with_loss = WithLossCell(net, loss) train_network = TrainOneStepCell(net_with_loss, optimizer) _executor.compile(train_network, inputs, label)
def train(net, data, label): learning_rate = 0.01 momentum = 0.9 optimizer = Momentum( filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() res = train_network(data, label) print("+++++++++Loss+++++++++++++") print(res) print("+++++++++++++++++++++++++++") assert res return res
def test_loss_scale_fp16_lr_overflow(): inputs = Tensor(np.ones([16, 16]).astype(np.float32)) label = Tensor(np.zeros([16, 16]).astype(np.float32)) lr = Tensor(np.ones([1], np.float32) * 0.1) net = NetFP16(16, 16) net.set_train() loss = MSELoss() optimizer = Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9) net_with_loss = WithLossCell(net, loss) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=Tensor(np.full((1), np.finfo(np.float32).max), dtype=mstype.float32)) output_1 = train_network(inputs, label) output_2 = train_network(inputs, label) assert output_1[0].asnumpy() == output_2[0].asnumpy() assert output_1[1].asnumpy() == output_2[1].asnumpy() == True
def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path="", epoch_num=1): """ do train """ if load_checkpoint_path == "": raise ValueError("Pretrain model missed, finetune task must load pretrain model!") steps_per_epoch = dataset.get_dataset_size() # optimizer if optimizer_cfg.optimizer == 'AdamWeightDecay': lr_schedule = BertLearningRate(learning_rate=optimizer_cfg.AdamWeightDecay.learning_rate, end_learning_rate=optimizer_cfg.AdamWeightDecay.end_learning_rate, warmup_steps=int(steps_per_epoch * epoch_num * 0.1), decay_steps=steps_per_epoch * epoch_num, power=optimizer_cfg.AdamWeightDecay.power) params = network.trainable_params() decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params)) other_params = list(filter(lambda x: not optimizer_cfg.AdamWeightDecay.decay_filter(x), params)) group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay}, {'params': other_params, 'weight_decay': 0.0}] optimizer = AdamWeightDecay(group_params, lr_schedule, eps=optimizer_cfg.AdamWeightDecay.eps) elif optimizer_cfg.optimizer == 'Lamb': lr_schedule = BertLearningRate(learning_rate=optimizer_cfg.Lamb.learning_rate, end_learning_rate=optimizer_cfg.Lamb.end_learning_rate, warmup_steps=int(steps_per_epoch * epoch_num * 0.1), decay_steps=steps_per_epoch * epoch_num, power=optimizer_cfg.Lamb.power) optimizer = Lamb(network.trainable_params(), learning_rate=lr_schedule) elif optimizer_cfg.optimizer == 'Momentum': optimizer = Momentum(network.trainable_params(), learning_rate=optimizer_cfg.Momentum.learning_rate, momentum=optimizer_cfg.Momentum.momentum) else: raise Exception("Optimizer not supported. support: [AdamWeightDecay, Lamb, Momentum]") # load checkpoint into network ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch, keep_checkpoint_max=1) ckpoint_cb = ModelCheckpoint(prefix="ner", directory=None if save_checkpoint_path == "" else save_checkpoint_path, config=ckpt_config) param_dict = load_checkpoint(load_checkpoint_path) load_param_into_net(network, param_dict) update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32, scale_factor=2, scale_window=1000) netwithgrads = BertFinetuneCell(network, optimizer=optimizer, scale_update_cell=update_cell) model = Model(netwithgrads) callbacks = [TimeMonitor(dataset.get_dataset_size()), LossCallBack(dataset.get_dataset_size()), ckpoint_cb] model.train(epoch_num, dataset, callbacks=callbacks)
def test_trainTensor_amp(num_classes=10, epoch=18, batch_size=16): context.set_context(mode=context.GRAPH_MODE, device_target="GPU", enable_mem_reuse=False, enable_dynamic_memory=False) net = resnet50(num_classes) lr = 0.1 momentum = 0.9 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum) criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) train_network = amp.build_train_network(net, optimizer, criterion, level="O2") train_network.set_train() losses = [] for i in range(0, epoch): data = Tensor(np.ones([batch_size, 3, 224, 224]).astype(np.float32) * 0.01) label = Tensor(np.ones([batch_size]).astype(np.int32)) loss = train_network(data, label) losses.append(loss) assert(losses[-1][0].asnumpy() < 1) assert(losses[-1][1].asnumpy() == False) assert(losses[-1][2].asnumpy() > 1)
def test_trainTensor_big_batchSize(num_classes=10, epoch=8, batch_size=338): net = resnet50(num_classes) lr = 0.1 momentum = 0.9 optimizer = Momentum( filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() losses = [] for i in range(0, epoch): data = Tensor( np.ones([batch_size, 3, 224, 224]).astype(np.float32) * 0.01) label = Tensor(np.ones([batch_size]).astype(np.int32)) loss = train_network(data, label) losses.append(loss) assert (losses[-1].asnumpy() < 1)
def test_LSTM(): num_epochs = 5 embed_size = 100 num_hiddens = 100 num_layers = 2 bidirectional = True labels = 2 vocab_size = 252193 max_len = 500 weight = np.ones((vocab_size + 1, embed_size)).astype(np.float32) net = SentimentNet(vocab_size=(vocab_size + 1), embed_size=embed_size, num_hiddens=num_hiddens, num_layers=num_layers, bidirectional=bidirectional, weight=weight, labels=labels, batch_size=batch_size) learning_rate = 0.1 momentum = 0.9 optimizer = Momentum( filter(lambda x: x.requires_grad, net.get_parameters()), learning_rate, momentum) criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) # optimizer train_network.set_train() train_features = Tensor(np.ones([64, max_len]).astype(np.int32)) train_labels = Tensor(np.ones([ 64, ]).astype(np.int32)[0:64]) losses = [] for epoch in range(num_epochs): loss = train_network(train_features, train_labels) losses.append(loss) print("loss:", loss.asnumpy()) assert (losses[-1].asnumpy() < 0.01)
def test_momentum_compile(): inputs = Tensor(np.ones([15, 1]).astype(np.float32)) label = Tensor(np.zeros([15, 1]).astype(np.float32)) net = Net(1, 1) loss = MSELoss() optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) net_with_loss = WithLossCell(net, loss) train_network = TrainOneStepWithLossScaleCell(net_with_loss, optimizer, scale_sense=Tensor( np.full((1), 1.0), dtype=mstype.float32)) train_network.set_train() output = train_network(inputs, label) print("the result is ", output)
def test_compile_grad_error(): inputs = Tensor(np.ones([16, 16]).astype(np.float32)) label = Tensor(np.zeros([16, 16]).astype(np.float32)) lr = Tensor(np.ones([1], np.float32) * 0.1) net = NetFP16(16, 16) loss = MSELoss() optimizer = Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9) net_with_loss = WithLossCell(net, loss) scale_manager = DynamicLossScaleManager() update_cell = scale_manager.get_update_cell() train_network = TrainOneStepWithLossScaleCell( net_with_loss, optimizer, scale_update_cell=update_cell) train_network.set_train() with pytest.raises(TypeError) as e: train_network(inputs, label) print(e)
def test_trainTensor_amp(num_classes=10, epoch=18, batch_size=16): net = resnet50(num_classes) lr = 0.1 momentum = 0.9 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') train_network = amp.build_train_network( net, optimizer, criterion, level="O2") train_network.set_train() losses = [] for i in range(0, epoch): data = Tensor(np.ones([batch_size, 3, 224, 224] ).astype(np.float32) * 0.01) label = Tensor(np.ones([batch_size]).astype(np.int32)) loss = train_network(data, label) losses.append(loss) assert (losses[-1][0].asnumpy() < 1) assert not losses[-1][1].asnumpy() assert (losses[-1][2].asnumpy() > 1)
def test_group_lr(): inputs = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01) label = Tensor(np.ones([1, 10]).astype(np.float32)) net = LeNet5() conv_lr = 0.8 default_lr = 0.1 conv_params = list( filter(lambda x: 'conv' in x.name, net.trainable_params())) no_conv_params = list( filter(lambda x: 'conv' not in x.name, net.trainable_params())) group_params = [{ 'params': no_conv_params }, { 'params': conv_params, 'lr': conv_lr }, { 'order_params': net.trainable_params() }] net.set_train() loss = nn.SoftmaxCrossEntropyWithLogits() opt = Momentum(group_params, learning_rate=default_lr, momentum=0.9) assert opt.is_group is True assert opt.is_group_lr is True assert opt.dynamic_lr is False assert opt.is_group_params_ordered is True for lr, param, order_param in zip(opt.learning_rate, opt.parameters, net.trainable_params()): if 'conv' in param.name: assert np.all( lr.data.asnumpy() == Tensor(conv_lr, mstype.float32).asnumpy()) else: assert np.all(lr.data.asnumpy() == Tensor( default_lr, mstype.float32).asnumpy()) assert param.name == order_param.name net_with_loss = WithLossCell(net, loss) train_network = TrainOneStepCell(net_with_loss, opt) _executor.compile(train_network, inputs, label)
def test_loss_scale_fp16_model_train_overflow(): dataset_types = (np.float32, np.float32) dataset_shapes = ((16, 16), (16, 16)) dataset = MindDataSet(dataset_types, dataset_shapes) net = NetFP16(16, 16) net.set_train() loss = MSELoss() optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) scale_manager = DynamicLossScaleManager(init_loss_scale=16, scale_factor=2, scale_window=2) model = Model(net, loss_fn=loss, optimizer=optimizer, metrics=None, loss_scale_manager=scale_manager) model.train(2, dataset, dataset_sink_mode=False)
def test_trainTensor(num_classes=10, epoch=15, batch_size=32): net = AlexNet(num_classes) lr = 0.1 momentum = 0.9 optimizer = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, momentum, weight_decay=0.0001) criterion = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) net_with_criterion = WithLossCell(net, criterion) train_network = TrainOneStepCell(net_with_criterion, optimizer) train_network.set_train() losses = [] for i in range(0, epoch): data = Tensor( np.ones([batch_size, 3, 227, 227]).astype(np.float32) * 0.01) label = Tensor(np.ones([batch_size]).astype(np.int32)) loss = train_network(data, label).asnumpy() losses.append(loss) assert losses[-1] < 0.01
def do_train(dataset=None, network=None, load_checkpoint_path="", save_checkpoint_path=""): """ do train """ if load_checkpoint_path == "": raise ValueError("Pretrain model missed, finetune task must load pretrain model!") steps_per_epoch = dataset.get_dataset_size() epoch_num = dataset.get_repeat_count() # optimizer if optimizer_cfg.optimizer == 'AdamWeightDecayDynamicLR': optimizer = AdamWeightDecayDynamicLR(network.trainable_params(), decay_steps=steps_per_epoch * epoch_num, learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.learning_rate, end_learning_rate=optimizer_cfg.AdamWeightDecayDynamicLR.end_learning_rate, power=optimizer_cfg.AdamWeightDecayDynamicLR.power, warmup_steps=int(steps_per_epoch * epoch_num * 0.1), weight_decay=optimizer_cfg.AdamWeightDecayDynamicLR.weight_decay, eps=optimizer_cfg.AdamWeightDecayDynamicLR.eps) elif optimizer_cfg.optimizer == 'Lamb': optimizer = Lamb(network.trainable_params(), decay_steps=steps_per_epoch * epoch_num, start_learning_rate=optimizer_cfg.Lamb.start_learning_rate, end_learning_rate=optimizer_cfg.Lamb.end_learning_rate, power=optimizer_cfg.Lamb.power, weight_decay=optimizer_cfg.Lamb.weight_decay, warmup_steps=int(steps_per_epoch * epoch_num * 0.1), decay_filter=optimizer_cfg.Lamb.decay_filter) elif optimizer_cfg.optimizer == 'Momentum': optimizer = Momentum(network.trainable_params(), learning_rate=optimizer_cfg.Momentum.learning_rate, momentum=optimizer_cfg.Momentum.momentum) else: raise Exception("Optimizer not supported. support: [AdamWeightDecayDynamicLR, Lamb, Momentum]") # load checkpoint into network ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch, keep_checkpoint_max=1) ckpoint_cb = ModelCheckpoint(prefix="classifier", directory=save_checkpoint_path, config=ckpt_config) param_dict = load_checkpoint(load_checkpoint_path) load_param_into_net(network, param_dict) update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32, scale_factor=2, scale_window=1000) netwithgrads = BertFinetuneCell(network, optimizer=optimizer, scale_update_cell=update_cell) model = Model(netwithgrads) callbacks = [TimeMonitor(dataset.get_dataset_size()), LossCallBack(), ckpoint_cb] model.train(epoch_num, dataset, callbacks=callbacks)
def test_auto_parallel_flag(): context.set_auto_parallel_context(parallel_mode="auto_parallel", device_num=1) dataset_types = (np.float32, np.float32) dataset_shapes = ((16, 16), (16, 16)) dataset = MindDataSet(dataset_types, dataset_shapes) net = NetFP16(16, 16) net.set_train() scale_manager = FixedLossScaleManager() loss = MSELoss() optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) model = Model(net, loss_fn=loss, optimizer=optimizer, metrics=None, loss_scale_manager=scale_manager) model.train(2, dataset) assert model._train_network.get_flags()["auto_parallel"] context.reset_auto_parallel_context()
def test_parameter_update_float32(): """ test_parameter_update """ net = Net() loss = nn.SoftmaxCrossEntropyWithLogits() optimizer = Momentum(net.get_parameters(), 0.01, 0.001) net_with_loss = WithLossCell(net, loss) train_network = TrainOneStepCell(net_with_loss, optimizer) # compile train graph train_network.set_train() inputs = Tensor(np.ones([1, 64]).astype(np.float32)) label = Tensor(np.zeros([1, 10]).astype(np.float32)) _executor.compile(train_network, inputs, label) # construct and compile update graph param_lr = train_network.parameters_dict()['learning_rate'] update_network = ParameterUpdate(param_lr) update_network.phase = 'update_param' input_lr = Tensor(0.0001, mstype.float32) _executor.compile(update_network, input_lr)
def auto_parallel_compile_net(mode, dev_num, strategy1=None, strategy2=None): context.set_context(mode=context.GRAPH_MODE) context.set_auto_parallel_context(parallel_mode=mode, device_num=dev_num, enable_parallel_optimizer=True) inputs = Tensor(np.ones([32, 48]).astype(np.float32)) label = Tensor(np.zeros([32, 16]).astype(np.float32)) net = Net2(strategy1, strategy2) net = _VirtualDatasetCell(net) optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) train_network = TrainOneStepCell(net, optimizer) train_network.set_auto_parallel() train_network.set_train() _executor.compile(train_network, inputs, label, phase="train", auto_parallel_mode=True) context.reset_auto_parallel_context() return train_network
def test_graph_summary_sample(): """ test_graph_summary_sample """ log.debug("begin test_graph_summary_sample") dataset = get_dataset() net = Net() loss = nn.SoftmaxCrossEntropyWithLogits() optim = Momentum(net.trainable_params(), 0.1, 0.9) context.set_context(mode=context.GRAPH_MODE) model = Model(net, loss_fn=loss, optimizer=optim, metrics=None) with SummaryRecord(SUMMARY_DIR, file_suffix="_MS_GRAPH", network=model._train_network) as test_writer: model.train(2, dataset) # step 2: create the Event for i in range(1, 5): test_writer.record(i) # step 3: send the event to mq # step 4: accept the event and write the file log.debug("finished test_graph_summary_sample")
def test_gpu_profiler(self): context.set_context(mode=context.GRAPH_MODE, device_target="GPU") profiler = Profiler() ds_train = create_dataset(os.path.join(self.mnist_path, "train")) if ds_train.get_dataset_size() == 0: raise ValueError( "Please check dataset size > 0 and batch_size <= dataset size") lenet = LeNet5() loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") optim = Momentum(lenet.trainable_params(), learning_rate=0.1, momentum=0.9) model = Model(lenet, loss_fn=loss, optimizer=optim, metrics={'acc': Accuracy()}) model.train(1, ds_train, dataset_sink_mode=True) profiler.analyse() self._check_gpu_profiling_file()
def test_momentum_with_loss_scale_and_dynamic_lr(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, weight): super().__init__() self.weight = Parameter(weight, "w1") self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1) self.relu = P.ReLU().set_strategy(strategy2) def construct(self, x): out = self.matmul(x, self.weight) out = self.relu(out) return out context.set_auto_parallel_context(device_num=4, global_rank=0) strategy1 = ((2, 1), (2, 1)) strategy2 = ((4, 1), ) strategy3 = ((4, 1), (4, 1)) x = Tensor(np.ones([64, 32]), dtype=ms.float32) weight = Tensor(np.ones([64, 32]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) net = Net(strategy1, strategy2, weight) lr = Tensor(np.ones([6]), dtype=ms.float32) optimizer = Momentum(net.trainable_params(), learning_rate=lr, momentum=0.9, loss_scale=0.5) net_with_loss = NetWithLoss(net, strategy3) train_net = TrainOneStepCell(net_with_loss, optimizer) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") compile_net(train_net, x, b)
def test_lars(): class Net(nn.Cell): def __init__(self, strategy1, strategy2, weight): super().__init__() self.weight = Parameter(weight, "w1") self.matmul = P.MatMul(transpose_a=False, transpose_b=True).set_strategy(strategy1) self.relu = P.ReLU().set_strategy(strategy2) def construct(self, x): out = self.matmul(x, self.weight) out = self.relu(out) return out context.set_auto_parallel_context(device_num=4, global_rank=0) strategy1 = ((2, 1), (2, 1)) strategy2 = ((4, 1), ) strategy3 = ((4, 1), (4, 1)) x = Tensor(np.ones([64, 32]), dtype=ms.float32) weight = Tensor(np.ones([64, 32]), dtype=ms.float32) b = Tensor(np.ones([64, 64]), dtype=ms.float32) net = Net(strategy1, strategy2, weight) lr = Tensor(np.ones([6]), dtype=ms.float32) sgd = Momentum(net.trainable_params(), lr, 0.9) optimizer = LARS(sgd, epsilon=1e-08, hyperpara=0.02, decay_filter=lambda x: 'bn' not in x.name, lars_filter=lambda x: 'bn' not in x.name) net_with_loss = NetWithLoss(net, strategy3) train_net = TrainOneStepCell(net_with_loss, optimizer) context.set_auto_parallel_context(parallel_mode="semi_auto_parallel") compile_net(train_net, x, b)
def _get_optimizer(args_opt, network): """get bert optimizer, support Lamb, Momentum, AdamWeightDecay.""" if cfg.optimizer == 'Lamb': lr_schedule = BertLearningRate(learning_rate=cfg.Lamb.learning_rate, end_learning_rate=cfg.Lamb.end_learning_rate, warmup_steps=cfg.Lamb.warmup_steps, decay_steps=args_opt.train_steps, power=cfg.Lamb.power) params = network.trainable_params() decay_params = list(filter(cfg.Lamb.decay_filter, params)) other_params = list(filter(lambda x: not cfg.Lamb.decay_filter(x), params)) group_params = [{'params': decay_params, 'weight_decay': cfg.Lamb.weight_decay}, {'params': other_params}, {'order_params': params}] optimizer = Lamb(group_params, learning_rate=lr_schedule, eps=cfg.Lamb.eps) elif cfg.optimizer == 'Momentum': optimizer = Momentum(network.trainable_params(), learning_rate=cfg.Momentum.learning_rate, momentum=cfg.Momentum.momentum) elif cfg.optimizer == 'AdamWeightDecay': lr_schedule = BertLearningRate(learning_rate=cfg.AdamWeightDecay.learning_rate, end_learning_rate=cfg.AdamWeightDecay.end_learning_rate, warmup_steps=cfg.AdamWeightDecay.warmup_steps, decay_steps=args_opt.train_steps, power=cfg.AdamWeightDecay.power) params = network.trainable_params() decay_params = list(filter(cfg.AdamWeightDecay.decay_filter, params)) other_params = list(filter(lambda x: not cfg.AdamWeightDecay.decay_filter(x), params)) group_params = [{'params': decay_params, 'weight_decay': cfg.AdamWeightDecay.weight_decay}, {'params': other_params, 'weight_decay': 0.0}, {'order_params': params}] optimizer = AdamWeightDecay(group_params, learning_rate=lr_schedule, eps=cfg.AdamWeightDecay.eps) else: raise ValueError("Don't support optimizer {}, only support [Lamb, Momentum, AdamWeightDecay]". format(cfg.optimizer)) return optimizer
def test_trainTensor_with_new_interface(num_classes=10, epoch=8, batch_size=1): net = resnet50(num_classes) criterion = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') net_with_criterion = WithLossCell(net, criterion) net_with_criterion.set_train() weights = ParameterTuple( filter(lambda x: x.requires_grad, net.get_parameters())) optimizer = Momentum(weights, 0.1, 0.9) train_network = ForwardValueAndGrad(network=net_with_criterion, weights=weights, get_by_list=True, sens_param=True) losses = [] for i in range(0, epoch): data = Tensor( np.ones([batch_size, 3, 224, 224]).astype(np.float32) * 0.01) label = Tensor(np.ones([batch_size]).astype(np.int32)) loss, grads = train_network(data, label, 1.0) grads = F.identity(grads) optimizer(grads) losses.append(loss) assert (losses[-1].asnumpy() < 0.8)
def test_row_tensor_model_train(): class Net(nn.Cell): def __init__(self, in_features, out_features): super(Net, self).__init__() self.weight = Parameter(Tensor(np.ones([out_features, in_features]).astype(np.float32)), name="weight") self.add = P.TensorAdd() self.cast = P.Cast() self.flag = True def construct(self, inputs, label): x = self.add(inputs, self.weight) if self.flag: x = self.cast(x, mstype.float32) return x dataset_types = (np.float32, np.float32) dataset_shapes = ((16, 16), (16, 16)) dataset = MindDataSet(dataset_types, dataset_shapes) net = Net(16, 16) net.set_train() optimizer = Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) model = Model(net, optimizer=optimizer) model.train(2, dataset, dataset_sink_mode=False)