def test_mobilenetv2_quant(): set_seed(1) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") config = config_ascend_quant print("training configure: {}".format(config)) epoch_size = config.epoch_size # define network network = mobilenetV2(num_classes=config.num_classes) # define loss if config.label_smooth > 0: loss = CrossEntropyWithLabelSmooth( smooth_factor=config.label_smooth, num_classes=config.num_classes) else: loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') # define dataset dataset = create_dataset(dataset_path=dataset_path, config=config, repeat_num=1, batch_size=config.batch_size) step_size = dataset.get_dataset_size() # convert fusion network to quantization aware network quantizer = QuantizationAwareTraining(bn_fold=True, per_channel=[True, False], symmetric=[True, False]) network = quantizer.quantize(network) # get learning rate lr = Tensor(get_lr(global_step=config.start_epoch * step_size, lr_init=0, lr_end=0, lr_max=config.lr, warmup_epochs=config.warmup_epochs, total_epochs=epoch_size + config.start_epoch, steps_per_epoch=step_size)) # define optimization opt = nn.Momentum(filter(lambda x: x.requires_grad, network.get_parameters()), lr, config.momentum, config.weight_decay) # define model model = Model(network, loss_fn=loss, optimizer=opt) print("============== Starting Training ==============") monitor = Monitor(lr_init=lr.asnumpy(), step_threshold=config.step_threshold) callback = [monitor] model.train(epoch_size, dataset, callbacks=callback, dataset_sink_mode=False) print("============== End Training ==============") export_time_used = 650 train_time = monitor.step_mseconds print('train_time_used:{}'.format(train_time)) assert train_time < export_time_used expect_avg_step_loss = 2.32 avg_step_loss = np.mean(np.array(monitor.losses)) print("average step loss:{}".format(avg_step_loss)) assert avg_step_loss < expect_avg_step_loss
loss = SoftmaxCrossEntropyWithLogits(sparse=True) if args_opt.do_train: dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, repeat_num=epoch_size, batch_size=config.batch_size) step_size = dataset.get_dataset_size() loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) lr = Tensor( get_lr(global_step=0, lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max, warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size, lr_decay_mode='poly')) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay, config.loss_scale) model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) time_cb = TimeMonitor(data_size=step_size) loss_cb = LossMonitor()
def test_resnet50_quant(): set_seed(1) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") config = config_quant print("training configure: {}".format(config)) epoch_size = config.epoch_size # define network net = resnet50_quant(class_num=config.class_num) net.set_train(True) # define loss if not config.use_label_smooth: config.label_smooth_factor = 0.0 loss = CrossEntropy(smooth_factor=config.label_smooth_factor, num_classes=config.class_num) #loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) # define dataset dataset = create_dataset(dataset_path=dataset_path, config=config, repeat_num=1, batch_size=config.batch_size) step_size = dataset.get_dataset_size() # convert fusion network to quantization aware network net = quant.convert_quant_network(net, bn_fold=True, per_channel=[True, False], symmetric=[True, False]) # get learning rate lr = Tensor( get_lr(lr_init=config.lr_init, lr_end=0.0, lr_max=config.lr_max, warmup_epochs=config.warmup_epochs, total_epochs=config.epoch_size, steps_per_epoch=step_size, lr_decay_mode='cosine')) # define optimization opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay, config.loss_scale) # define model #model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'acc'}) model = Model(net, loss_fn=loss, optimizer=opt) print("============== Starting Training ==============") monitor = Monitor(lr_init=lr.asnumpy(), step_threshold=config.step_threshold) callbacks = [monitor] model.train(epoch_size, dataset, callbacks=callbacks, dataset_sink_mode=False) print("============== End Training ==============") expect_avg_step_loss = 2.40 avg_step_loss = np.mean(np.array(monitor.losses)) print("average step loss:{}".format(avg_step_loss)) assert avg_step_loss < expect_avg_step_loss
def train(net, ckpt_save_dir, target): # pylint: disable=too-many-locals """ train the network""" # create dataset train_dataset = create_dataset(dataset_path=ARGS_OPT.train_dataset, do_train=True, repeat_num=1, batch_size=config.batch_size, target=target) # pylint: disable=no-member step_size = train_dataset.get_dataset_size() # init lr learning_rate = get_lr( lr_init=config.lr_init, lr_end=config.lr_end, # pylint: disable=no-member lr_max=config.lr_max, warmup_epochs=config.warmup_epochs, # pylint: disable=no-member total_epochs=config.epoch_size, # pylint: disable=no-member steps_per_epoch=step_size, lr_decay_mode=config.lr_decay_mode) # pylint: disable=no-member learning_rate = Tensor(learning_rate) # define opt decayed_params = [] no_decayed_params = [] for param in net.trainable_params(): if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name: decayed_params.append(param) else: no_decayed_params.append(param) group_params = [ { 'params': decayed_params, 'weight_decay': config.weight_decay }, # pylint: disable=no-member { 'params': no_decayed_params }, { 'order_params': net.trainable_params() } ] opt = Momentum(group_params, learning_rate, config.momentum, loss_scale=config.loss_scale) # pylint: disable=no-member # define loss, model loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) # pylint: disable=no-member model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale, metrics={'top_1_accuracy', 'top_5_accuracy'}, amp_level="O2", keep_batchnorm_fp32=False) # define callbacks time_cb = TimeMonitor(data_size=step_size) loss_cb = LossMonitor() callbacks = [time_cb, loss_cb] if config.save_checkpoint: # pylint: disable=no-member config_ck = CheckpointConfig( save_checkpoint_steps=config.save_checkpoint_epochs * step_size, # pylint: disable=no-member keep_checkpoint_max=config.keep_checkpoint_max) # pylint: disable=no-member ckpt_cb = ModelCheckpoint(prefix="resnet", directory=ckpt_save_dir, config=config_ck) callbacks += [ckpt_cb] # define the eval call back epochs_per_eval = {"epoch": [], "acc": []} if not ARGS_OPT.run_distribute: eval_dataset = create_dataset( dataset_path=ARGS_OPT.eval_dataset, do_train=False, batch_size=config.batch_size, # pylint: disable=no-member target=target) eval_cb = EvalCallBack(model, eval_dataset, 1, epochs_per_eval) callbacks.append(eval_cb) # start training the qunat aware training network model.train( config.epoch_size, train_dataset, callbacks=callbacks, # pylint: disable=no-member sink_size=train_dataset.get_dataset_size(), dataset_sink_mode=False) if not ARGS_OPT.run_distribute: print( "***************** evaluation results of training process ***************** " ) print(epochs_per_eval)
dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, repeat_num=epoch_size, batch_size=config.batch_size) step_size = dataset.get_dataset_size() if args_opt.pre_trained: param_dict = load_checkpoint(args_opt.pre_trained) load_param_into_net(net, param_dict) loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) lr = Tensor( get_lr(global_step=0, lr_init=0, lr_end=0, lr_max=config.lr, warmup_epochs=config.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size)) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay, config.loss_scale) model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale) cb = None if rank_id == 0: cb = [Monitor(lr_init=lr.asnumpy())] if config.save_checkpoint:
num_classes=config.class_num) if args_opt.do_train: dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=True, repeat_num=epoch_size, batch_size=config.batch_size, target=target) step_size = dataset.get_dataset_size() loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) lr = get_lr(lr_init=config.lr_init, lr_end=0.0, lr_max=config.lr_max, warmup_epochs=config.warmup_epochs, total_epochs=config.epoch_size, steps_per_epoch=step_size, lr_decay_mode='cosine') if args_opt.pre_trained: lr = lr[config.pretrained_epoch_size * step_size:] lr = Tensor(lr) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, config.weight_decay, config.loss_scale) if target == "Ascend": model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale,
eval_dataset = create_dataset(dataset_path=args.eval_path, do_train=False, repeat_num=epoch_size // eval_interval, batch_size=args.eval_batch_size) eval_step_size = eval_dataset.get_dataset_size() # loss scale loss_scale = FixedLossScaleManager(args.loss_scale, drop_overflow_update=False) # learning rate lr_array = get_lr(global_step=0, lr_init=args.lr_init, lr_end=args.lr_min, lr_max=args.lr_max, warmup_epochs=args.warmup_epochs, total_epochs=epoch_size, steps_per_epoch=step_size, lr_decay_mode=args.lr_decay_mode, poly_power=args.poly_power) lr = Tensor(lr_array) # optimizer decayed_params = list( filter( lambda x: 'beta' not in x.name and 'gamma' not in x.name and 'bias' not in x.name, net.trainable_params())) no_decayed_params = [ param for param in net.trainable_params() if param not in decayed_params ]