示例#1
0
def resnet50_train(args_opt):
    epoch_size = args_opt.epoch_size
    batch_size = cfg.batch_size
    class_num = cfg.class_num
    loss_scale_num = cfg.loss_scale
    local_data_path = '/cache/data'
    local_ckpt_path = '/cache/ckpt_file'

    # set graph mode and parallel mode
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=args_opt.device_target,
                        save_graphs=False)

    # data download
    print('Download data.')
    mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path)

    # create dataset
    print('Create train and evaluate dataset.')
    train_dataset = create_dataset(dataset_path=local_data_path,
                                   do_train=True,
                                   repeat_num=epoch_size,
                                   batch_size=batch_size)
    train_step_size = train_dataset.get_dataset_size()
    print('Create dataset success.')

    # create model
    net = resnet50(class_num=class_num)
    # reduction='mean' means that apply reduction of mean to loss
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    lr = Tensor(
        get_lr(global_step=0,
               total_epochs=epoch_size,
               steps_per_epoch=train_step_size))
    opt = Momentum(net.trainable_params(),
                   lr,
                   momentum=0.9,
                   weight_decay=1e-4,
                   loss_scale=loss_scale_num)
    loss_scale = FixedLossScaleManager(loss_scale_num, False)

    # amp_level="O2" means that the hybrid precision of O2 mode is used for training
    # the whole network except that batchnorm will be cast into float16 format and dynamic loss scale will be used
    # 'keep_batchnorm_fp32 = False' means that use the float16 format
    model = Model(net,
                  amp_level="O2",
                  keep_batchnorm_fp32=False,
                  loss_fn=loss,
                  optimizer=opt,
                  loss_scale_manager=loss_scale,
                  metrics={'acc'})

    # define performance callback to show ips and loss callback to show loss for every epoch
    time_cb = TimeMonitor(data_size=train_step_size)
    performance_cb = PerformanceCallback(batch_size)
    loss_cb = LossMonitor()
    cb = [time_cb, performance_cb, loss_cb]
    config_ck = CheckpointConfig(
        save_checkpoint_steps=cfg.save_checkpoint_epochs * train_step_size,
        keep_checkpoint_max=cfg.keep_checkpoint_max)
    ckpt_cb = ModelCheckpoint(prefix="resnet",
                              directory=local_ckpt_path,
                              config=config_ck)
    cb += [ckpt_cb]

    print(f'Start run training, total epoch: {epoch_size}.')
    model.train(epoch_size, train_dataset, callbacks=cb)

    # upload checkpoint files
    print('Upload checkpoint.')
    mox.file.copy_parallel(src_url=local_ckpt_path, dst_url=args_opt.train_url)
示例#2
0
def train():
    # set args
    dev = "GPU"
    epoch_size = int(args_opt.epoch_size)
    total_batch = int(args_opt.batch_size)
    print_per_steps = int(args_opt.print_per_steps)
    compute_type = str(args_opt.dtype).lower()
    ckpt_save_dir = str(args_opt.ckpt_path)
    save_ckpt = bool(args_opt.save_ckpt)
    device_num = 1
    # init context
    if args_opt.mode == "GRAPH":
        mode = context.GRAPH_MODE
        all_reduce_fusion_config = [85, 160]
    else:
        mode = context.PYNATIVE_MODE
        all_reduce_fusion_config = [30, 90, 160]
    context.set_context(mode=mode, device_target=dev, save_graphs=False)
    if args_opt.run_distribute:
        init()
        device_num = get_group_size()
        context.set_auto_parallel_context(
            device_num=device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            gradients_mean=True,
            all_reduce_fusion_config=all_reduce_fusion_config)
        ckpt_save_dir = ckpt_save_dir + "ckpt_" + str(get_rank()) + "/"

    # create dataset
    dataset = create_dataset(dataset_path=args_opt.dataset_path,
                             do_train=True,
                             repeat_num=1,
                             batch_size=total_batch,
                             target=dev,
                             dtype=compute_type,
                             device_num=device_num)
    step_size = dataset.get_dataset_size()
    if (print_per_steps > step_size or print_per_steps < 1):
        print("Arg: print_per_steps should lessequal to dataset_size ",
              step_size)
        print("Change to default: 20")
        print_per_steps = 20
    # define net
    net = resnet(class_num=1001, dtype=compute_type)

    # init weight
    for _, cell in net.cells_and_names():
        if isinstance(cell, nn.Conv2d):
            cell.weight.set_data(
                weight_init.initializer(weight_init.XavierUniform(),
                                        cell.weight.shape, cell.weight.dtype))
        if isinstance(cell, nn.Dense):
            cell.weight.set_data(
                weight_init.initializer(weight_init.TruncatedNormal(),
                                        cell.weight.shape, cell.weight.dtype))

    # init lr
    lr = get_liner_lr(lr_init=0,
                      lr_end=0,
                      lr_max=0.8,
                      warmup_epochs=0,
                      total_epochs=epoch_size,
                      steps_per_epoch=step_size)
    lr = Tensor(lr)

    # define opt
    decayed_params = []
    no_decayed_params = []
    for param in net.trainable_params():
        if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
            decayed_params.append(param)
        else:
            no_decayed_params.append(param)

    # define loss, model
    loss = CrossEntropySmooth(sparse=True,
                              reduction='mean',
                              smooth_factor=0.1,
                              num_classes=1001)
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr,
                   0.9, 1e-4)
    loss_scale = FixedLossScaleManager(1024, drop_overflow_update=False)
    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
    # Mixed precision
    if compute_type == "fp16":
        if mode == context.PYNATIVE_MODE:
            opt = MomentumWeightDecay(
                filter(lambda x: x.requires_grad, net.get_parameters()), lr,
                0.9, 1e-4, 1024)
        else:
            opt = Momentum(
                filter(lambda x: x.requires_grad, net.get_parameters()), lr,
                0.9, 1e-4, 1024)
        model = Model(net,
                      loss_fn=loss,
                      optimizer=opt,
                      loss_scale_manager=loss_scale,
                      metrics={'acc'},
                      amp_level="O2",
                      keep_batchnorm_fp32=False)
    # define callbacks
    if mode == context.PYNATIVE_MODE:
        print_per_steps = 1
    time_cb = MyTimeMonitor(total_batch, print_per_steps, step_size, mode)
    cb = [time_cb]
    if save_ckpt:
        config_ck = CheckpointConfig(save_checkpoint_steps=5 * step_size,
                                     keep_checkpoint_max=5)
        ckpt_cb = ModelCheckpoint(prefix="resnet_benchmark",
                                  directory=ckpt_save_dir,
                                  config=config_ck)
        cb += [ckpt_cb]
    # train model
    print("========START RESNET50 GPU BENCHMARK========")
    if mode == context.GRAPH_MODE:
        model.train(int(epoch_size * step_size / print_per_steps),
                    dataset,
                    callbacks=cb,
                    sink_size=print_per_steps)
    else:
        model.train(epoch_size, dataset, callbacks=cb)
示例#3
0
    if args.dataset == "cifar10":
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
        model = Model(network,
                      loss_fn=loss,
                      optimizer=opt,
                      metrics={'acc'},
                      amp_level="O2",
                      keep_batchnorm_fp32=False,
                      loss_scale_manager=None)
    else:
        if not args.label_smooth:
            args.label_smooth_factor = 0.0
        loss = CrossEntropy(smooth_factor=args.label_smooth_factor,
                            num_classes=args.num_classes)

        loss_scale_manager = FixedLossScaleManager(args.loss_scale,
                                                   drop_overflow_update=False)
        model = Model(network,
                      loss_fn=loss,
                      optimizer=opt,
                      loss_scale_manager=loss_scale_manager,
                      amp_level="O2")

    # define callbacks
    time_cb = TimeMonitor(data_size=batch_num)
    loss_cb = LossMonitor(per_print_times=batch_num)
    callbacks = [time_cb, loss_cb]
    if args.rank_save_ckpt_flag:
        ckpt_config = CheckpointConfig(
            save_checkpoint_steps=args.ckpt_interval * args.steps_per_epoch,
            keep_checkpoint_max=args.ckpt_save_max)
        save_ckpt_path = os.path.join(args.outputs_dir,
示例#4
0
                               args.group_size)

    if not args.ckpt_interval:
        args.ckpt_interval = args.steps_per_epoch

    lr = get_lr(args)

    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)
    is_gpu = context.get_context("device_target") == "GPU"
    if is_gpu:
        loss_scale_value = 1.0
        loss_scale = FixedLossScaleManager(loss_scale_value,
                                           drop_overflow_update=False)
        network = amp.build_train_network(network,
                                          optimizer=opt,
                                          loss_scale_manager=loss_scale,
                                          level="O2",
                                          keep_batchnorm_fp32=False)
        keep_loss_fp32(network)
    else:
        network = TrainingWrapper(network, opt)
        network.set_train()

    if args.rank_save_ckpt_flag:
        # checkpoint save
        ckpt_max_num = 10
        ckpt_config = CheckpointConfig(
            save_checkpoint_steps=args.ckpt_interval,
示例#5
0
文件: train.py 项目: xyg320/mindspore
                is_grad=False, sparse=True, reduction='mean')
        # define dataset
        epoch_size = config_gpu.epoch_size
        dataset = create_dataset(dataset_path=args_opt.dataset_path,
                                 do_train=True,
                                 config=config_gpu,
                                 platform=args_opt.platform,
                                 repeat_num=epoch_size,
                                 batch_size=config_gpu.batch_size)
        step_size = dataset.get_dataset_size()
        # resume
        if args_opt.pre_trained:
            param_dict = load_checkpoint(args_opt.pre_trained)
            load_param_into_net(net, param_dict)
        # define optimizer
        loss_scale = FixedLossScaleManager(
            config_gpu.loss_scale, drop_overflow_update=False)
        lr = Tensor(get_lr(global_step=0,
                           lr_init=0,
                           lr_end=0,
                           lr_max=config_gpu.lr,
                           warmup_epochs=config_gpu.warmup_epochs,
                           total_epochs=epoch_size,
                           steps_per_epoch=step_size))
        opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config_gpu.momentum,
                       config_gpu.weight_decay, config_gpu.loss_scale)
        # define model
        model = Model(net, loss_fn=loss, optimizer=opt,
                      loss_scale_manager=loss_scale)

        cb = [Monitor(lr_init=lr.asnumpy())]
        if config_gpu.save_checkpoint:
示例#6
0
def train_process(q, device_id, epoch_size, device_num, enable_hccl):
    os.system("mkdir " + str(device_id))
    os.chdir(str(device_id))
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend",
                        save_graphs=False)
    context.set_context(device_id=device_id)
    os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = MINDSPORE_HCCL_CONFIG_PATH
    os.environ['RANK_ID'] = str(device_id)
    os.environ['RANK_SIZE'] = str(device_num)
    if enable_hccl:
        context.set_auto_parallel_context(
            device_num=device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            mirror_mean=True,
            parameter_broadcast=True)
        auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160])
        init()

    # network
    net = resnet50(class_num=config.class_num)

    # evaluation network
    dist_eval_network = ClassifyCorrectCell(net)

    if not config.use_label_smooth:
        config.label_smooth_factor = 0.0

    # loss
    loss = nn.SoftmaxCrossEntropyWithLogits(
        sparse=True,
        reduction="mean",
        smooth_factor=config.label_smooth_factor,
        num_classes=config.class_num)

    # train dataset
    dataset = create_dataset(dataset_path=dataset_path,
                             do_train=True,
                             repeat_num=epoch_size,
                             batch_size=config.batch_size)

    step_size = dataset.get_dataset_size()
    eval_interval = config.eval_interval
    dataset.__loop_size__ = step_size * eval_interval

    # evalutation dataset
    eval_dataset = create_dataset(dataset_path=eval_path,
                                  do_train=False,
                                  repeat_num=epoch_size,
                                  batch_size=config.eval_batch_size)

    # loss scale
    loss_scale = FixedLossScaleManager(config.loss_scale,
                                       drop_overflow_update=False)

    # learning rate
    lr = Tensor(
        get_learning_rate(lr_init=config.lr_init,
                          lr_end=0.0,
                          lr_max=config.lr_max,
                          warmup_epochs=config.warmup_epochs,
                          total_epochs=config.epoch_size,
                          steps_per_epoch=step_size,
                          lr_decay_mode=config.lr_decay_mode))

    # optimizer
    decayed_params = list(
        filter(
            lambda x: 'beta' not in x.name and 'gamma' not in x.name and 'bias'
            not in x.name, net.trainable_params()))
    no_decayed_params = [
        param for param in net.trainable_params()
        if param not in decayed_params
    ]
    group_params = [{
        'params': decayed_params,
        'weight_decay': config.weight_decay
    }, {
        'params': no_decayed_params,
        'weight_decay': 0.0
    }, {
        'order_params': net.trainable_params()
    }]

    if config.use_lars:
        momentum = nn.Momentum(group_params,
                               lr,
                               config.momentum,
                               loss_scale=config.loss_scale,
                               use_nesterov=config.use_nesterov)
        opt = nn.LARS(momentum,
                      epsilon=config.lars_epsilon,
                      coefficient=config.lars_coefficient,
                      lars_filter=lambda x: 'beta' not in x.name and 'gamma'
                      not in x.name and 'bias' not in x.name)

    else:
        opt = nn.Momentum(group_params,
                          lr,
                          config.momentum,
                          loss_scale=config.loss_scale,
                          use_nesterov=config.use_nesterov)

    # model
    model = Model(net,
                  loss_fn=loss,
                  optimizer=opt,
                  loss_scale_manager=loss_scale,
                  amp_level="O2",
                  keep_batchnorm_fp32=False,
                  metrics={
                      'acc':
                      DistAccuracy(batch_size=config.eval_batch_size,
                                   device_num=device_num)
                  },
                  eval_network=dist_eval_network)

    # model init
    print("init_start", device_id)
    model.init(dataset, eval_dataset)
    print("init_stop", device_id)

    # callbacks
    loss_cb = LossGet(1, step_size)

    # train and eval
    print("run_start", device_id)
    acc = 0.0
    time_cost = 0.0
    for epoch_idx in range(0, int(epoch_size / eval_interval)):
        model.train(1, dataset, callbacks=loss_cb)
        eval_start = time.time()
        output = model.eval(eval_dataset)
        eval_cost = (time.time() - eval_start) * 1000
        acc = float(output["acc"])
        time_cost = loss_cb.get_per_step_time()
        loss = loss_cb.get_loss()
        print(
            "the {} epoch's resnet result:\n "
            "device{}, training loss {}, acc {}, "
            "training per step cost {:.2f} ms, eval cost {:.2f} ms, total_cost {:.2f} ms"
            .format(epoch_idx, device_id, loss, acc, time_cost, eval_cost,
                    time_cost * step_size + eval_cost))
    q.put({'acc': acc, 'cost': time_cost})
示例#7
0
def main():
    args, _ = parser.parse_known_args()
    rank_id, rank_size = 0, 1

    context.set_context(mode=context.GRAPH_MODE)

    if args.distributed:
        if args.GPU:
            init("nccl")
            context.set_context(device_target='GPU')
        else:
            raise ValueError("Only supported GPU training.")
        context.reset_auto_parallel_context()
        rank_id = get_rank()
        rank_size = get_group_size()
        context.set_auto_parallel_context(
            parallel_mode=ParallelMode.DATA_PARALLEL,
            gradients_mean=True,
            device_num=rank_size)
    else:
        if args.GPU:
            context.set_context(device_target='GPU')
        else:
            raise ValueError("Only supported GPU training.")

    net = efficientnet_b0(
        num_classes=cfg.num_classes,
        drop_rate=cfg.drop,
        drop_connect_rate=cfg.drop_connect,
        global_pool=cfg.gp,
        bn_tf=cfg.bn_tf,
    )

    train_data_url = args.data_path
    train_dataset = create_dataset(cfg.batch_size,
                                   train_data_url,
                                   workers=cfg.workers,
                                   distributed=args.distributed)
    batches_per_epoch = train_dataset.get_dataset_size()

    loss_cb = LossMonitor(per_print_times=batches_per_epoch)
    loss = LabelSmoothingCrossEntropy(smooth_factor=cfg.smoothing)
    time_cb = TimeMonitor(data_size=batches_per_epoch)
    loss_scale_manager = FixedLossScaleManager(cfg.loss_scale,
                                               drop_overflow_update=False)

    callbacks = [time_cb, loss_cb]

    if cfg.save_checkpoint:
        config_ck = CheckpointConfig(
            save_checkpoint_steps=batches_per_epoch,
            keep_checkpoint_max=cfg.keep_checkpoint_max)
        ckpoint_cb = ModelCheckpoint(prefix=cfg.model,
                                     directory='./ckpt_' + str(rank_id) + '/',
                                     config=config_ck)
        callbacks += [ckpoint_cb]

    lr = Tensor(
        get_lr(base_lr=cfg.lr,
               total_epochs=cfg.epochs,
               steps_per_epoch=batches_per_epoch,
               decay_steps=cfg.decay_epochs,
               decay_rate=cfg.decay_rate,
               warmup_steps=cfg.warmup_epochs,
               warmup_lr_init=cfg.warmup_lr_init,
               global_epoch=cfg.resume_start_epoch))
    if cfg.opt == 'sgd':
        optimizer = SGD(net.trainable_params(),
                        learning_rate=lr,
                        momentum=cfg.momentum,
                        weight_decay=cfg.weight_decay,
                        loss_scale=cfg.loss_scale)
    elif cfg.opt == 'rmsprop':
        optimizer = RMSProp(net.trainable_params(),
                            learning_rate=lr,
                            decay=0.9,
                            weight_decay=cfg.weight_decay,
                            momentum=cfg.momentum,
                            epsilon=cfg.opt_eps,
                            loss_scale=cfg.loss_scale)

    loss.add_flags_recursive(fp32=True, fp16=False)

    if args.resume:
        ckpt = load_checkpoint(args.resume)
        load_param_into_net(net, ckpt)

    model = Model(net,
                  loss,
                  optimizer,
                  loss_scale_manager=loss_scale_manager,
                  amp_level=cfg.amp_level)

    #    callbacks = callbacks if is_master else []

    if args.resume:
        real_epoch = cfg.epochs - cfg.resume_start_epoch
        model.train(real_epoch,
                    train_dataset,
                    callbacks=callbacks,
                    dataset_sink_mode=True)
    else:
        model.train(cfg.epochs,
                    train_dataset,
                    callbacks=callbacks,
                    dataset_sink_mode=True)
def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
    os.system("mkdir " + str(device_id))
    os.chdir(str(device_id))
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend",
                        save_graphs=False)
    context.set_context(device_id=device_id)
    os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = MINDSPORE_HCCL_CONFIG_PATH_2
    os.environ['RANK_ID'] = str(device_id - 4)
    os.environ['RANK_SIZE'] = str(device_num)
    if enable_hccl:
        context.set_auto_parallel_context(
            device_num=device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            gradients_mean=True,
            all_reduce_fusion_config=[85, 160])
        init()

    # network
    net = resnet50_thor(thor_config.class_num)

    if not thor_config.label_smooth:
        thor_config.label_smooth_factor = 0.0

    # loss
    loss = CrossEntropySmooth(sparse=True,
                              reduction="mean",
                              smooth_factor=thor_config.label_smooth_factor,
                              num_classes=thor_config.class_num)

    # train dataset
    dataset = create_dataset_thor(dataset_path=dataset_path,
                                  do_train=True,
                                  repeat_num=1,
                                  batch_size=thor_config.batch_size)

    step_size = dataset.get_dataset_size()
    eval_interval = thor_config.eval_interval

    # evaluation dataset
    eval_dataset = create_dataset(dataset_path=eval_path,
                                  do_train=False,
                                  repeat_num=1,
                                  batch_size=thor_config.eval_batch_size)

    # loss scale
    loss_scale = FixedLossScaleManager(thor_config.loss_scale,
                                       drop_overflow_update=False)

    # learning rate
    lr = get_thor_lr(0, 0.05803, 4.04839, 53, 5004, decay_epochs=39)
    damping = get_thor_damping(0, 0.02714, 0.50036, 70, 5004)
    # optimizer
    split_indices = [26, 53]
    opt = THOR(net,
               Tensor(lr),
               Tensor(damping),
               thor_config.momentum,
               thor_config.weight_decay,
               thor_config.loss_scale,
               thor_config.batch_size,
               split_indices=split_indices)

    # evaluation network
    dist_eval_network = ClassifyCorrectCell(net)
    # model
    model = THOR_Model(net,
                       loss_fn=loss,
                       optimizer=opt,
                       loss_scale_manager=loss_scale,
                       amp_level="O2",
                       keep_batchnorm_fp32=False,
                       metrics={
                           'acc':
                           DistAccuracy(batch_size=thor_config.eval_batch_size,
                                        device_num=device_num)
                       },
                       eval_network=dist_eval_network,
                       frequency=thor_config.frequency)

    # model init
    print("init_start", device_id)
    model.init(dataset, eval_dataset)
    print("init_stop", device_id)

    # callbacks
    loss_cb = LossGet(1, step_size)

    # train and eval
    acc = 0.0
    time_cost = 0.0
    print("run_start", device_id)
    for epoch_idx in range(0, int(epoch_size / eval_interval)):
        model.train(eval_interval, dataset, callbacks=loss_cb)
        eval_start = time.time()
        output = model.eval(eval_dataset)
        eval_cost = (time.time() - eval_start) * 1000
        acc = float(output["acc"])
        time_cost = loss_cb.get_per_step_time()
        loss = loss_cb.get_loss()
        print(
            "the {} epoch's resnet result:\n "
            "device{}, training loss {}, acc {}, "
            "training per step cost {:.2f} ms, eval cost {:.2f} ms, total_cost {:.2f} ms"
            .format(epoch_idx, device_id, loss, acc, time_cost, eval_cost,
                    time_cost * step_size + eval_cost))
    q.put({'acc': acc, 'cost': time_cost})
示例#9
0
def train(cloud_args=None):
    """training process"""
    args = parse_args(cloud_args)
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        device_target=args.platform,
                        save_graphs=False)
    if os.getenv('DEVICE_ID', "not_set").isdigit():
        context.set_context(device_id=int(os.getenv('DEVICE_ID')))

    # init distributed
    if args.is_distributed:
        if args.platform == "Ascend":
            init()
        else:
            init("nccl")
        args.rank = get_rank()
        args.group_size = get_group_size()
        parallel_mode = ParallelMode.DATA_PARALLEL
        context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                          device_num=args.group_size,
                                          parameter_broadcast=True,
                                          mirror_mean=True)
    else:
        args.rank = 0
        args.group_size = 1

    if args.is_dynamic_loss_scale == 1:
        args.loss_scale = 1  # for dynamic loss scale can not set loss scale in momentum opt

    # select for master rank save ckpt or all rank save, compatiable for model parallel
    args.rank_save_ckpt_flag = 0
    if args.is_save_on_master:
        if args.rank == 0:
            args.rank_save_ckpt_flag = 1
    else:
        args.rank_save_ckpt_flag = 1

    # logger
    args.outputs_dir = os.path.join(
        args.ckpt_path,
        datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    args.logger = get_logger(args.outputs_dir, args.rank)

    # dataloader
    de_dataset = classification_dataset(args.data_dir,
                                        args.image_size,
                                        args.per_batch_size,
                                        1,
                                        args.rank,
                                        args.group_size,
                                        num_parallel_workers=8)
    de_dataset.map_model = 4  # !!!important
    args.steps_per_epoch = de_dataset.get_dataset_size()

    args.logger.save_args(args)

    # network
    args.logger.important_info('start create network')
    # get network and init
    network = get_network(args.backbone,
                          args.num_classes,
                          platform=args.platform)
    if network is None:
        raise NotImplementedError('not implement {}'.format(args.backbone))

    # load pretrain model
    if os.path.isfile(args.pretrained):
        param_dict = load_checkpoint(args.pretrained)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('network.'):
                param_dict_new[key[8:]] = values
            else:
                param_dict_new[key] = values
        load_param_into_net(network, param_dict_new)
        args.logger.info('load model {} success'.format(args.pretrained))

    # lr scheduler
    if args.lr_scheduler == 'exponential':
        lr = warmup_step_lr(
            args.lr,
            args.lr_epochs,
            args.steps_per_epoch,
            args.warmup_epochs,
            args.max_epoch,
            gamma=args.lr_gamma,
        )
    elif args.lr_scheduler == 'cosine_annealing':
        lr = warmup_cosine_annealing_lr(args.lr, args.steps_per_epoch,
                                        args.warmup_epochs, args.max_epoch,
                                        args.T_max, args.eta_min)
    else:
        raise NotImplementedError(args.lr_scheduler)

    # optimizer
    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)

    # loss
    if not args.label_smooth:
        args.label_smooth_factor = 0.0
    loss = CrossEntropy(smooth_factor=args.label_smooth_factor,
                        num_classes=args.num_classes)

    if args.is_dynamic_loss_scale == 1:
        loss_scale_manager = DynamicLossScaleManager(init_loss_scale=65536,
                                                     scale_factor=2,
                                                     scale_window=2000)
    else:
        loss_scale_manager = FixedLossScaleManager(args.loss_scale,
                                                   drop_overflow_update=False)

    if args.platform == "Ascend":
        model = Model(network,
                      loss_fn=loss,
                      optimizer=opt,
                      loss_scale_manager=loss_scale_manager,
                      metrics={'acc'},
                      amp_level="O3")
    else:
        auto_mixed_precision(network)
        model = Model(network,
                      loss_fn=loss,
                      optimizer=opt,
                      loss_scale_manager=loss_scale_manager,
                      metrics={'acc'})

    # checkpoint save
    progress_cb = ProgressMonitor(args)
    callbacks = [
        progress_cb,
    ]
    if args.rank_save_ckpt_flag:
        ckpt_config = CheckpointConfig(
            save_checkpoint_steps=args.ckpt_interval * args.steps_per_epoch,
            keep_checkpoint_max=args.ckpt_save_max)
        ckpt_cb = ModelCheckpoint(config=ckpt_config,
                                  directory=args.outputs_dir,
                                  prefix='{}'.format(args.rank))
        callbacks.append(ckpt_cb)

    model.train(args.max_epoch,
                de_dataset,
                callbacks=callbacks,
                dataset_sink_mode=True)
示例#10
0
def resnet50_train(args_opt):
    epoch_size = args_opt.epoch_size
    batch_size = 32
    class_num = 10
    loss_scale_num = 1024
    local_data_path = '/cache/data'

    # set graph mode and parallel mode
    context.set_context(mode=context.GRAPH_MODE)
    if device_num > 1:
        context.set_auto_parallel_context(
            device_num=device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            mirror_mean=True)
        local_data_path = os.path.join(local_data_path, str(device_id))

    # data download
    print('Download data.')
    mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path)

    # create dataset
    print('Create train and evaluate dataset.')
    train_dataset = create_dataset(dataset_path=local_data_path,
                                   do_train=True,
                                   repeat_num=epoch_size,
                                   batch_size=batch_size)
    eval_dataset = create_dataset(dataset_path=local_data_path,
                                  do_train=False,
                                  repeat_num=1,
                                  batch_size=batch_size)
    train_step_size = train_dataset.get_dataset_size()
    print('Create dataset success.')

    # create model
    net = resnet50(class_num=class_num)
    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    lr = Tensor(
        get_lr(global_step=0,
               total_epochs=epoch_size,
               steps_per_epoch=train_step_size))
    opt = Momentum(net.trainable_params(),
                   lr,
                   momentum=0.9,
                   weight_decay=1e-4,
                   loss_scale=loss_scale_num)
    loss_scale = FixedLossScaleManager(loss_scale_num, False)

    model = Model(net,
                  loss_fn=loss,
                  optimizer=opt,
                  loss_scale_manager=loss_scale,
                  metrics={'acc'})

    # define performance callback to show ips and loss callback to show loss for every epoch
    performance_cb = PerformanceCallback(batch_size)
    loss_cb = LossMonitor()
    cb = [performance_cb, loss_cb]

    print(f'Start run training, total epoch: {epoch_size}.')
    model.train(epoch_size, train_dataset, callbacks=cb)
    if device_num == 1 or device_id == 0:
        print(f'Start run evaluation.')
        output = model.eval(eval_dataset)
        print(f'Evaluation result: {output}.')
示例#11
0
def train():
    """Train function."""
    args = parse_args()

    devid = int(os.getenv('DEVICE_ID')) if os.getenv('DEVICE_ID') else 0
    context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True,
                        device_target=args.device_target, save_graphs=True, device_id=devid)

    # init distributed
    if args.is_distributed:
        if args.device_target == "Ascend":
            init()
        else:
            init("nccl")
        args.rank = get_rank()
        args.group_size = get_group_size()

    # select for master rank save ckpt or all rank save, compatiable for model parallel
    args.rank_save_ckpt_flag = 0
    if args.is_save_on_master:
        if args.rank == 0:
            args.rank_save_ckpt_flag = 1
    else:
        args.rank_save_ckpt_flag = 1

    # logger
    args.outputs_dir = os.path.join(args.ckpt_path,
                                    datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    args.logger = get_logger(args.outputs_dir, args.rank)
    args.logger.save_args(args)

    if args.need_profiler:
        from mindspore.profiler.profiling import Profiler
        profiler = Profiler(output_path=args.outputs_dir, is_detail=True, is_show_op_path=True)

    loss_meter = AverageMeter('loss')

    context.reset_auto_parallel_context()
    if args.is_distributed:
        parallel_mode = ParallelMode.DATA_PARALLEL
        degree = get_group_size()
    else:
        parallel_mode = ParallelMode.STAND_ALONE
        degree = 1
    context.set_auto_parallel_context(parallel_mode=parallel_mode, mirror_mean=True, device_num=degree)

    network = YOLOV3DarkNet53(is_training=True)
    # default is kaiming-normal
    default_recurisive_init(network)

    if args.pretrained_backbone:
        network = load_backbone(network, args.pretrained_backbone, args)
        args.logger.info('load pre-trained backbone {} into network'.format(args.pretrained_backbone))
    else:
        args.logger.info('Not load pre-trained backbone, please be careful')

    if args.resume_yolov3:
        param_dict = load_checkpoint(args.resume_yolov3)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('yolo_network.'):
                param_dict_new[key[13:]] = values
                args.logger.info('in resume {}'.format(key))
            else:
                param_dict_new[key] = values
                args.logger.info('in resume {}'.format(key))

        args.logger.info('resume finished')
        load_param_into_net(network, param_dict_new)
        args.logger.info('load_model {} success'.format(args.resume_yolov3))

    network = YoloWithLossCell(network)
    args.logger.info('finish get network')

    config = ConfigYOLOV3DarkNet53()

    config.label_smooth = args.label_smooth
    config.label_smooth_factor = args.label_smooth_factor

    if args.training_shape:
        config.multi_scale = [conver_training_shape(args)]
    if args.resize_rate:
        config.resize_rate = args.resize_rate

    ds, data_size = create_yolo_dataset(image_dir=args.data_root, anno_path=args.annFile, is_training=True,
                                        batch_size=args.per_batch_size, max_epoch=args.max_epoch,
                                        device_num=args.group_size, rank=args.rank, config=config)
    args.logger.info('Finish loading dataset')

    args.steps_per_epoch = int(data_size / args.per_batch_size / args.group_size)

    if not args.ckpt_interval:
        args.ckpt_interval = args.steps_per_epoch

    # lr scheduler
    if args.lr_scheduler == 'exponential':
        lr = warmup_step_lr(args.lr,
                            args.lr_epochs,
                            args.steps_per_epoch,
                            args.warmup_epochs,
                            args.max_epoch,
                            gamma=args.lr_gamma,
                            )
    elif args.lr_scheduler == 'cosine_annealing':
        lr = warmup_cosine_annealing_lr(args.lr,
                                        args.steps_per_epoch,
                                        args.warmup_epochs,
                                        args.max_epoch,
                                        args.T_max,
                                        args.eta_min)
    elif args.lr_scheduler == 'cosine_annealing_V2':
        lr = warmup_cosine_annealing_lr_V2(args.lr,
                                           args.steps_per_epoch,
                                           args.warmup_epochs,
                                           args.max_epoch,
                                           args.T_max,
                                           args.eta_min)
    elif args.lr_scheduler == 'cosine_annealing_sample':
        lr = warmup_cosine_annealing_lr_sample(args.lr,
                                               args.steps_per_epoch,
                                               args.warmup_epochs,
                                               args.max_epoch,
                                               args.T_max,
                                               args.eta_min)
    else:
        raise NotImplementedError(args.lr_scheduler)

    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)
    enable_amp = False
    is_gpu = context.get_context("device_target") == "GPU"
    if is_gpu:
        enable_amp = True
    if enable_amp:
        loss_scale_value = 1.0
        loss_scale = FixedLossScaleManager(loss_scale_value, drop_overflow_update=False)
        network = amp.build_train_network(network, optimizer=opt, loss_scale_manager=loss_scale,
                                          level="O2", keep_batchnorm_fp32=True)
        keep_loss_fp32(network)
    else:
        network = TrainingWrapper(network, opt)
        network.set_train()

    if args.rank_save_ckpt_flag:
        # checkpoint save
        ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
        ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval,
                                       keep_checkpoint_max=ckpt_max_num)
        ckpt_cb = ModelCheckpoint(config=ckpt_config,
                                  directory=args.outputs_dir,
                                  prefix='{}'.format(args.rank))
        cb_params = _InternalCallbackParam()
        cb_params.train_network = network
        cb_params.epoch_num = ckpt_max_num
        cb_params.cur_epoch_num = 1
        run_context = RunContext(cb_params)
        ckpt_cb.begin(run_context)

    old_progress = -1
    t_end = time.time()
    data_loader = ds.create_dict_iterator()

    for i, data in enumerate(data_loader):
        images = data["image"]
        input_shape = images.shape[2:4]
        args.logger.info('iter[{}], shape{}'.format(i, input_shape[0]))

        images = Tensor(images)

        batch_y_true_0 = Tensor(data['bbox1'])
        batch_y_true_1 = Tensor(data['bbox2'])
        batch_y_true_2 = Tensor(data['bbox3'])
        batch_gt_box0 = Tensor(data['gt_box1'])
        batch_gt_box1 = Tensor(data['gt_box2'])
        batch_gt_box2 = Tensor(data['gt_box3'])

        input_shape = Tensor(tuple(input_shape[::-1]), ms.float32)
        loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1,
                       batch_gt_box2, input_shape)
        loss_meter.update(loss.asnumpy())

        if args.rank_save_ckpt_flag:
            # ckpt progress
            cb_params.cur_step_num = i + 1  # current step number
            cb_params.batch_num = i + 2
            ckpt_cb.step_end(run_context)

        if i % args.log_interval == 0:
            time_used = time.time() - t_end
            epoch = int(i / args.steps_per_epoch)
            fps = args.per_batch_size * (i - old_progress) * args.group_size / time_used
            if args.rank == 0:
                args.logger.info(
                    'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(epoch, i, loss_meter, fps, lr[i]))
            t_end = time.time()
            loss_meter.reset()
            old_progress = i

        if (i + 1) % args.steps_per_epoch == 0 and args.rank_save_ckpt_flag:
            cb_params.cur_epoch_num += 1

        if args.need_profiler:
            if i == 10:
                profiler.analyse()
                break

    args.logger.info('==========end training===============')
示例#12
0
def inception_v4_train():
    """
    Train Inceptionv4 in data parallelism
    """
    print('epoch_size: {} batch_size: {} class_num {}'.format(
        config.epoch_size, config.batch_size, config.num_classes))

    context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
    context.set_context(device_id=args.device_id)
    context.set_context(enable_graph_kernel=False)
    rank = 0
    if device_num > 1:
        init(backend_name='hccl')
        rank = get_rank()
        context.set_auto_parallel_context(
            device_num=device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            gradients_mean=True,
            all_reduce_fusion_config=[200, 400])

    # create dataset
    train_dataset = create_dataset(dataset_path=args.dataset_path,
                                   do_train=True,
                                   repeat_num=1,
                                   batch_size=config.batch_size)
    train_step_size = train_dataset.get_dataset_size()

    # create model
    net = Inceptionv4(classes=config.num_classes)
    # loss
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    # learning rate
    lr = Tensor(
        generate_cosine_lr(steps_per_epoch=train_step_size,
                           total_epochs=config.epoch_size))

    decayed_params = []
    no_decayed_params = []
    for param in net.trainable_params():
        if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
            decayed_params.append(param)
        else:
            no_decayed_params.append(param)
    for param in net.trainable_params():
        if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
            param.set_data(
                initializer(XavierUniform(), param.data.shape,
                            param.data.dtype))
    group_params = [{
        'params': decayed_params,
        'weight_decay': config.weight_decay
    }, {
        'params': no_decayed_params
    }, {
        'order_params': net.trainable_params()
    }]

    opt = RMSProp(group_params,
                  lr,
                  decay=config.decay,
                  epsilon=config.epsilon,
                  weight_decay=config.weight_decay,
                  momentum=config.momentum,
                  loss_scale=config.loss_scale)

    if args.device_id == 0:
        print(lr)
        print(train_step_size)
    if args.resume:
        ckpt = load_checkpoint(args.resume)
        load_param_into_net(net, ckpt)

    loss_scale_manager = FixedLossScaleManager(config.loss_scale,
                                               drop_overflow_update=False)
    model = Model(net,
                  loss_fn=loss,
                  optimizer=opt,
                  metrics={'acc', 'top_1_accuracy', 'top_5_accuracy'},
                  loss_scale_manager=loss_scale_manager,
                  amp_level=config.amp_level)

    # define callbacks
    performance_cb = TimeMonitor(data_size=train_step_size)
    loss_cb = LossMonitor(per_print_times=train_step_size)
    ckp_save_step = config.save_checkpoint_epochs * train_step_size
    config_ck = CheckpointConfig(
        save_checkpoint_steps=ckp_save_step,
        keep_checkpoint_max=config.keep_checkpoint_max)
    ckpoint_cb = ModelCheckpoint(prefix=f"inceptionV4-train-rank{rank}",
                                 directory='ckpts_rank_' + str(rank),
                                 config=config_ck)
    callbacks = [performance_cb, loss_cb]
    if device_num > 1 and config.is_save_on_master:
        if args.device_id == 0:
            callbacks.append(ckpoint_cb)
    else:
        callbacks.append(ckpoint_cb)

    # train model
    model.train(config.epoch_size,
                train_dataset,
                callbacks=callbacks,
                dataset_sink_mode=True)
示例#13
0
def resnet50_train(args_opt):
    epoch_size = args_opt.epoch_size
    batch_size = 32
    class_num = 10
    loss_scale_num = 1024
    local_data_path = '/cache/data'

    # set graph mode and parallel mode
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend",
                        save_graphs=False)
    context.set_context(device_id=device_id)
    if device_num > 1:
        context.set_auto_parallel_context(
            device_num=device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            mirror_mean=True)
        init()
        local_data_path = os.path.join(local_data_path, str(device_id))

    # data download
    print('Download data.')
    mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=local_data_path)

    # create dataset
    print('Create train and evaluate dataset.')
    train_dataset = create_dataset(dataset_path=local_data_path,
                                   do_train=True,
                                   repeat_num=1,
                                   batch_size=batch_size)
    eval_dataset = create_dataset(dataset_path=local_data_path,
                                  do_train=False,
                                  repeat_num=1,
                                  batch_size=batch_size)
    train_step_size = train_dataset.get_dataset_size()
    print('Create dataset success.')

    # create model
    net = resnet50(class_num=class_num)
    # reduction='mean' means that apply reduction of mean to loss
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    lr = Tensor(
        get_lr(global_step=0,
               total_epochs=epoch_size,
               steps_per_epoch=train_step_size))
    opt = Momentum(net.trainable_params(),
                   lr,
                   momentum=0.9,
                   weight_decay=1e-4,
                   loss_scale=loss_scale_num)
    loss_scale = FixedLossScaleManager(loss_scale_num, False)

    # amp_level="O2" means that the hybrid precision of O2 mode is used for training
    # the whole network except that batchnoram will be cast into float16 format and dynamic loss scale will be used
    # 'keep_batchnorm_fp32 = False' means that use the float16 format
    model = Model(net,
                  amp_level="O2",
                  keep_batchnorm_fp32=False,
                  loss_fn=loss,
                  optimizer=opt,
                  loss_scale_manager=loss_scale,
                  metrics={'acc'})

    # define performance callback to show ips and loss callback to show loss for every epoch
    performance_cb = PerformanceCallback(batch_size)
    loss_cb = LossMonitor()
    cb = [performance_cb, loss_cb]

    print(f'Start run training, total epoch: {epoch_size}.')
    model.train(epoch_size, train_dataset, callbacks=cb)
    if device_num == 1 or device_id == 0:
        print(f'Start run evaluation.')
        output = model.eval(eval_dataset)
        print(f'Evaluation result: {output}.')
示例#14
0
def train():
    """Train function."""

    args.outputs_dir = params['save_model_path']

    if args.group_size > 1:
        init()
        context.set_auto_parallel_context(
            device_num=get_group_size(),
            parallel_mode=ParallelMode.DATA_PARALLEL,
            gradients_mean=True)
        args.outputs_dir = os.path.join(args.outputs_dir,
                                        "ckpt_{}/".format(str(get_rank())))
        args.rank = get_rank()
    else:
        args.outputs_dir = os.path.join(args.outputs_dir, "ckpt_0/")
        args.rank = 0

    if args.group_size > 1:
        args.max_epoch = params["max_epoch_train_NP"]
        args.loss_scale = params['loss_scale'] / 2
        args.lr_steps = list(map(int, params["lr_steps_NP"].split(',')))
        params['train_type'] = params['train_type_NP']
        params['optimizer'] = params['optimizer_NP']
        params['group_params'] = params['group_params_NP']
    else:
        args.max_epoch = params["max_epoch_train"]
        args.loss_scale = params['loss_scale']
        args.lr_steps = list(map(int, params["lr_steps"].split(',')))

    # create network
    print('start create network')
    criterion = openpose_loss()
    criterion.add_flags_recursive(fp32=True)
    network = OpenPoseNet(vggpath=params['vgg_path'],
                          vgg_with_bn=params['vgg_with_bn'])
    if params["load_pretrain"]:
        print("load pretrain model:", params["pretrained_model_path"])
        load_model(network, params["pretrained_model_path"])
    train_net = BuildTrainNetwork(network, criterion)

    # create dataset
    if os.path.exists(args.jsonpath_train) and os.path.exists(args.imgpath_train) \
            and os.path.exists(args.maskpath_train):
        print('start create dataset')
    else:
        print('Error: wrong data path')
        return 0

    num_worker = 20 if args.group_size > 1 else 48
    de_dataset_train = create_dataset(args.jsonpath_train,
                                      args.imgpath_train,
                                      args.maskpath_train,
                                      batch_size=params['batch_size'],
                                      rank=args.rank,
                                      group_size=args.group_size,
                                      num_worker=num_worker,
                                      multiprocessing=True,
                                      shuffle=True,
                                      repeat_num=1)
    steps_per_epoch = de_dataset_train.get_dataset_size()
    print("steps_per_epoch: ", steps_per_epoch)

    # lr scheduler
    lr_stage, lr_base, lr_vgg = get_lr(params['lr'] * args.group_size,
                                       params['lr_gamma'],
                                       steps_per_epoch,
                                       args.max_epoch,
                                       args.lr_steps,
                                       args.group_size,
                                       lr_type=params['lr_type'],
                                       warmup_epoch=params['warmup_epoch'])

    # optimizer
    if params['group_params']:
        vgg19_base_params = list(
            filter(lambda x: 'base.vgg_base' in x.name,
                   train_net.trainable_params()))
        base_params = list(
            filter(lambda x: 'base.conv' in x.name,
                   train_net.trainable_params()))
        stages_params = list(
            filter(lambda x: 'base' not in x.name,
                   train_net.trainable_params()))

        group_params = [{
            'params': vgg19_base_params,
            'lr': lr_vgg
        }, {
            'params': base_params,
            'lr': lr_base
        }, {
            'params': stages_params,
            'lr': lr_stage
        }]

        if params['optimizer'] == "Momentum":
            opt = Momentum(group_params, learning_rate=lr_stage, momentum=0.9)
        elif params['optimizer'] == "Adam":
            opt = Adam(group_params)
        else:
            raise ValueError("optimizer not support.")
    else:
        if params['optimizer'] == "Momentum":
            opt = Momentum(train_net.trainable_params(),
                           learning_rate=lr_stage,
                           momentum=0.9)
        elif params['optimizer'] == "Adam":
            opt = Adam(train_net.trainable_params(), learning_rate=lr_stage)
        else:
            raise ValueError("optimizer not support.")

    # callback
    config_ck = CheckpointConfig(
        save_checkpoint_steps=params['ckpt_interval'],
        keep_checkpoint_max=params["keep_checkpoint_max"])
    ckpoint_cb = ModelCheckpoint(prefix='{}'.format(args.rank),
                                 directory=args.outputs_dir,
                                 config=config_ck)
    time_cb = TimeMonitor(data_size=de_dataset_train.get_dataset_size())
    if args.rank == 0:
        callback_list = [MyLossMonitor(), time_cb, ckpoint_cb]
    else:
        callback_list = [MyLossMonitor(), time_cb]

    # train
    if params['train_type'] == 'clip_grad':
        train_net = TrainOneStepWithClipGradientCell(train_net,
                                                     opt,
                                                     sens=args.loss_scale)
        train_net.set_train()
        model = Model(train_net)
    elif params['train_type'] == 'fix_loss_scale':
        loss_scale_manager = FixedLossScaleManager(args.loss_scale,
                                                   drop_overflow_update=False)
        train_net.set_train()
        model = Model(train_net,
                      optimizer=opt,
                      loss_scale_manager=loss_scale_manager)
    else:
        raise ValueError("Type {} is not support.".format(
            params['train_type']))

    print("============== Starting Training ==============")
    model.train(args.max_epoch,
                de_dataset_train,
                callbacks=callback_list,
                dataset_sink_mode=False)
    return 0
def train():
    """Train function."""
    args = parse_args()
    devid = int(os.getenv('DEVICE_ID', '0'))
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        device_target=args.device_target,
                        save_graphs=False,
                        device_id=devid)
    loss_meter = AverageMeter('loss')

    network = YOLOV4CspDarkNet53(is_training=True)
    # default is kaiming-normal
    default_recursive_init(network)

    if args.pretrained_backbone:
        pretrained_backbone_slice = args.pretrained_backbone.split('/')
        backbone_ckpt_file = pretrained_backbone_slice[
            len(pretrained_backbone_slice) - 1]
        local_backbone_ckpt_path = '/cache/' + backbone_ckpt_file
        # download backbone checkpoint
        mox.file.copy_parallel(src_url=args.pretrained_backbone,
                               dst_url=local_backbone_ckpt_path)
        args.pretrained_backbone = local_backbone_ckpt_path
    load_yolov4_params(args, network)

    network = YoloWithLossCell(network)
    args.logger.info('finish get network')

    config = ConfigYOLOV4CspDarkNet53()

    config.label_smooth = args.label_smooth
    config.label_smooth_factor = args.label_smooth_factor

    if args.training_shape:
        config.multi_scale = [convert_training_shape(args)]
    if args.resize_rate:
        config.resize_rate = args.resize_rate

    # data download
    local_data_path = '/cache/data'
    local_ckpt_path = '/cache/ckpt_file'
    print('Download data.')
    mox.file.copy_parallel(src_url=args.data_url, dst_url=local_data_path)

    ds, data_size = create_yolo_dataset(
        image_dir=os.path.join(local_data_path, 'images'),
        anno_path=os.path.join(local_data_path, 'annotation.json'),
        is_training=True,
        batch_size=args.per_batch_size,
        max_epoch=args.max_epoch,
        device_num=args.group_size,
        rank=args.rank,
        config=config)
    args.logger.info('Finish loading dataset')

    args.steps_per_epoch = int(data_size / args.per_batch_size /
                               args.group_size)

    if not args.ckpt_interval:
        args.ckpt_interval = args.steps_per_epoch * 10

    lr = get_lr(args)

    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)
    is_gpu = context.get_context("device_target") == "GPU"
    if is_gpu:
        loss_scale_value = 1.0
        loss_scale = FixedLossScaleManager(loss_scale_value,
                                           drop_overflow_update=False)
        network = amp.build_train_network(network,
                                          optimizer=opt,
                                          loss_scale_manager=loss_scale,
                                          level="O2",
                                          keep_batchnorm_fp32=False)
        keep_loss_fp32(network)
    else:
        network = TrainingWrapper(network, opt)
        network.set_train()

    # checkpoint save
    ckpt_max_num = 10
    ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval,
                                   keep_checkpoint_max=ckpt_max_num)
    ckpt_cb = ModelCheckpoint(config=ckpt_config,
                              directory=local_ckpt_path,
                              prefix='yolov4')
    cb_params = _InternalCallbackParam()
    cb_params.train_network = network
    cb_params.epoch_num = ckpt_max_num
    cb_params.cur_epoch_num = 1
    run_context = RunContext(cb_params)
    ckpt_cb.begin(run_context)

    old_progress = -1
    t_end = time.time()
    data_loader = ds.create_dict_iterator(output_numpy=True, num_epochs=1)

    for i, data in enumerate(data_loader):
        images = data["image"]
        input_shape = images.shape[2:4]
        images = Tensor.from_numpy(images)

        batch_y_true_0 = Tensor.from_numpy(data['bbox1'])
        batch_y_true_1 = Tensor.from_numpy(data['bbox2'])
        batch_y_true_2 = Tensor.from_numpy(data['bbox3'])
        batch_gt_box0 = Tensor.from_numpy(data['gt_box1'])
        batch_gt_box1 = Tensor.from_numpy(data['gt_box2'])
        batch_gt_box2 = Tensor.from_numpy(data['gt_box3'])

        input_shape = Tensor(tuple(input_shape[::-1]), ms.float32)
        loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2,
                       batch_gt_box0, batch_gt_box1, batch_gt_box2,
                       input_shape)
        loss_meter.update(loss.asnumpy())

        # ckpt progress
        cb_params.cur_step_num = i + 1  # current step number
        cb_params.batch_num = i + 2
        ckpt_cb.step_end(run_context)

        if i % args.log_interval == 0:
            time_used = time.time() - t_end
            epoch = int(i / args.steps_per_epoch)
            fps = args.per_batch_size * (
                i - old_progress) * args.group_size / time_used
            if args.rank == 0:
                args.logger.info(
                    'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(
                        epoch, i, loss_meter, fps, lr[i]))
            t_end = time.time()
            loss_meter.reset()
            old_progress = i

        if (i + 1) % args.steps_per_epoch == 0:
            cb_params.cur_epoch_num += 1

    args.logger.info('==========end training===============')

    # upload checkpoint files
    print('Upload checkpoint.')
    mox.file.copy_parallel(src_url=local_ckpt_path, dst_url=args.train_url)
示例#16
0
def train():
    args = parse_args()
    cfg = FCN8s_VOC2012_cfg
    device_num = int(os.environ.get("DEVICE_NUM", 1))
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        save_graphs=False,
                        device_target="Ascend",
                        device_id=args.device_id)
    # init multicards training
    args.rank = 0
    args.group_size = 1
    if device_num > 1:
        parallel_mode = ParallelMode.DATA_PARALLEL
        context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                          gradients_mean=True,
                                          device_num=device_num)
        init()
        args.rank = get_rank()
        args.group_size = get_group_size()

    # dataset
    dataset = data_generator.SegDataset(image_mean=cfg.image_mean,
                                        image_std=cfg.image_std,
                                        data_file=cfg.data_file,
                                        batch_size=cfg.batch_size,
                                        crop_size=cfg.crop_size,
                                        max_scale=cfg.max_scale,
                                        min_scale=cfg.min_scale,
                                        ignore_label=cfg.ignore_label,
                                        num_classes=cfg.num_classes,
                                        num_readers=2,
                                        num_parallel_calls=4,
                                        shard_id=args.rank,
                                        shard_num=args.group_size)
    dataset = dataset.get_dataset(repeat=1)

    net = FCN8s(n_class=cfg.num_classes)
    loss_ = loss.SoftmaxCrossEntropyLoss(cfg.num_classes, cfg.ignore_label)

    # load pretrained vgg16 parameters to init FCN8s
    if cfg.ckpt_vgg16:
        param_vgg = load_checkpoint(cfg.ckpt_vgg16)
        param_dict = {}
        for layer_id in range(1, 6):
            sub_layer_num = 2 if layer_id < 3 else 3
            for sub_layer_id in range(sub_layer_num):
                # conv param
                y_weight = 'conv{}.{}.weight'.format(layer_id,
                                                     3 * sub_layer_id)
                x_weight = 'vgg16_feature_extractor.conv{}_{}.0.weight'.format(
                    layer_id, sub_layer_id + 1)
                param_dict[y_weight] = param_vgg[x_weight]
                # BatchNorm param
                y_gamma = 'conv{}.{}.gamma'.format(layer_id,
                                                   3 * sub_layer_id + 1)
                y_beta = 'conv{}.{}.beta'.format(layer_id,
                                                 3 * sub_layer_id + 1)
                x_gamma = 'vgg16_feature_extractor.conv{}_{}.1.gamma'.format(
                    layer_id, sub_layer_id + 1)
                x_beta = 'vgg16_feature_extractor.conv{}_{}.1.beta'.format(
                    layer_id, sub_layer_id + 1)
                param_dict[y_gamma] = param_vgg[x_gamma]
                param_dict[y_beta] = param_vgg[x_beta]
        load_param_into_net(net, param_dict)
    # load pretrained FCN8s
    elif cfg.ckpt_pre_trained:
        param_dict = load_checkpoint(cfg.ckpt_pre_trained)
        load_param_into_net(net, param_dict)

    # optimizer
    iters_per_epoch = dataset.get_dataset_size()

    lr_scheduler = CosineAnnealingLR(cfg.base_lr,
                                     cfg.train_epochs,
                                     iters_per_epoch,
                                     cfg.train_epochs,
                                     warmup_epochs=0,
                                     eta_min=0)
    lr = Tensor(lr_scheduler.get_lr())

    # loss scale
    manager_loss_scale = FixedLossScaleManager(cfg.loss_scale,
                                               drop_overflow_update=False)

    optimizer = nn.Momentum(params=net.trainable_params(),
                            learning_rate=lr,
                            momentum=0.9,
                            weight_decay=0.0001,
                            loss_scale=cfg.loss_scale)

    model = Model(net,
                  loss_fn=loss_,
                  loss_scale_manager=manager_loss_scale,
                  optimizer=optimizer,
                  amp_level="O3")

    # callback for saving ckpts
    time_cb = TimeMonitor(data_size=iters_per_epoch)
    loss_cb = LossMonitor()
    cbs = [time_cb, loss_cb]

    if args.rank == 0:
        config_ck = CheckpointConfig(
            save_checkpoint_steps=cfg.save_steps,
            keep_checkpoint_max=cfg.keep_checkpoint_max)
        ckpoint_cb = ModelCheckpoint(prefix=cfg.model,
                                     directory=cfg.ckpt_dir,
                                     config=config_ck)
        cbs.append(ckpoint_cb)

    model.train(cfg.train_epochs, dataset, callbacks=cbs)
示例#17
0
文件: train.py 项目: yrpang/mindspore
def main():
    """Main entrance for training"""
    args = parser.parse_args()
    print(sys.argv)
    devid, args.rank_id, args.rank_size = 0, 0, 1

    context.set_context(mode=context.GRAPH_MODE)

    if args.distributed:
        if args.GPU:
            init("nccl")
            context.set_context(device_target='GPU')
        else:
            init()
            devid = int(os.getenv('DEVICE_ID'))
            context.set_context(device_target='Ascend',
                                device_id=devid,
                                reserve_class_name_in_scope=False)
        context.reset_auto_parallel_context()
        args.rank_id = get_rank()
        args.rank_size = get_group_size()
        context.set_auto_parallel_context(
            parallel_mode=ParallelMode.DATA_PARALLEL,
            gradients_mean=True,
            device_num=args.rank_size)
    else:
        if args.GPU:
            context.set_context(device_target='GPU')

    is_master = not args.distributed or (args.rank_id == 0)

    # parse model argument
    assert args.model.startswith(
        "tinynet"), "Only Tinynet models are supported."
    _, sub_name = args.model.split("_")
    net = tinynet(sub_model=sub_name,
                  num_classes=args.num_classes,
                  drop_rate=args.drop,
                  drop_connect_rate=args.drop_connect,
                  global_pool="avg",
                  bn_tf=args.bn_tf,
                  bn_momentum=args.bn_momentum,
                  bn_eps=args.bn_eps)

    if is_master:
        print("Total number of parameters:", count_params(net))
    # input image size of the network
    input_size = net.default_cfg['input_size'][1]

    train_dataset = val_dataset = None
    train_data_url = os.path.join(args.data_path, 'train')
    val_data_url = os.path.join(args.data_path, 'val')
    val_dataset = create_dataset_val(args.batch_size,
                                     val_data_url,
                                     workers=args.workers,
                                     distributed=False,
                                     input_size=input_size)

    if args.train:
        train_dataset = create_dataset(args.batch_size,
                                       train_data_url,
                                       workers=args.workers,
                                       distributed=args.distributed,
                                       input_size=input_size)
        batches_per_epoch = train_dataset.get_dataset_size()

    loss = LabelSmoothingCrossEntropy(smooth_factor=args.smoothing,
                                      num_classes=args.num_classes)
    time_cb = TimeMonitor(data_size=batches_per_epoch)
    loss_scale_manager = FixedLossScaleManager(args.loss_scale,
                                               drop_overflow_update=False)

    lr_array = get_lr(base_lr=args.lr,
                      total_epochs=args.epochs,
                      steps_per_epoch=batches_per_epoch,
                      decay_epochs=args.decay_epochs,
                      decay_rate=args.decay_rate,
                      warmup_epochs=args.warmup_epochs,
                      warmup_lr_init=args.warmup_lr,
                      global_epoch=0)
    lr = Tensor(lr_array)

    loss_cb = LossMonitor(lr_array,
                          args.epochs,
                          per_print_times=args.per_print_times,
                          start_epoch=0)

    param_group = add_weight_decay(net, weight_decay=args.weight_decay)

    if args.opt == 'sgd':
        if is_master:
            print('Using SGD optimizer')
        optimizer = SGD(param_group,
                        learning_rate=lr,
                        momentum=args.momentum,
                        weight_decay=args.weight_decay,
                        loss_scale=args.loss_scale)

    elif args.opt == 'rmsprop':
        if is_master:
            print('Using rmsprop optimizer')
        optimizer = RMSProp(param_group,
                            learning_rate=lr,
                            decay=0.9,
                            weight_decay=args.weight_decay,
                            momentum=args.momentum,
                            epsilon=args.opt_eps,
                            loss_scale=args.loss_scale)

    loss.add_flags_recursive(fp32=True, fp16=False)
    eval_metrics = {
        'Validation-Loss': Loss(),
        'Top1-Acc': Top1CategoricalAccuracy(),
        'Top5-Acc': Top5CategoricalAccuracy()
    }

    if args.ckpt:
        ckpt = load_checkpoint(args.ckpt)
        load_param_into_net(net, ckpt)
        net.set_train(False)

    model = Model(net,
                  loss,
                  optimizer,
                  metrics=eval_metrics,
                  loss_scale_manager=loss_scale_manager,
                  amp_level=args.amp_level)

    net_ema = copy.deepcopy(net)
    net_ema.set_train(False)
    assert args.ema_decay > 0, "EMA should be used in tinynet training."

    ema_cb = EmaEvalCallBack(network=net,
                             ema_network=net_ema,
                             loss_fn=loss,
                             eval_dataset=val_dataset,
                             decay=args.ema_decay,
                             save_epoch=args.ckpt_save_epoch,
                             dataset_sink_mode=args.dataset_sink,
                             start_epoch=0)

    callbacks = [loss_cb, ema_cb, time_cb] if is_master else []

    if is_master:
        print("Training on " + args.model + " with " + str(args.num_classes) +
              " classes")

    model.train(args.epochs,
                train_dataset,
                callbacks=callbacks,
                dataset_sink_mode=args.dataset_sink)
示例#18
0
def train():
    args = parse_args()

    # init multicards training
    if args.is_distributed:
        init()
        args.rank = get_rank()
        args.group_size = get_group_size()

        parallel_mode = ParallelMode.DATA_PARALLEL
        context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                          gradients_mean=True,
                                          device_num=args.group_size)

    # dataset
    dataset = data_generator.SegDataset(image_mean=args.image_mean,
                                        image_std=args.image_std,
                                        data_file=args.data_file,
                                        batch_size=args.batch_size,
                                        crop_size=args.crop_size,
                                        max_scale=args.max_scale,
                                        min_scale=args.min_scale,
                                        ignore_label=args.ignore_label,
                                        num_classes=args.num_classes,
                                        num_readers=2,
                                        num_parallel_calls=4,
                                        shard_id=args.rank,
                                        shard_num=args.group_size)
    dataset = dataset.get_dataset(repeat=1)

    # network
    if args.model == 'deeplab_v3_s16':
        network = net_factory.nets_map[args.model]('train', args.num_classes,
                                                   16, args.freeze_bn)
    elif args.model == 'deeplab_v3_s8':
        network = net_factory.nets_map[args.model]('train', args.num_classes,
                                                   8, args.freeze_bn)
    else:
        raise NotImplementedError('model [{:s}] not recognized'.format(
            args.model))

    # loss
    loss_ = loss.SoftmaxCrossEntropyLoss(args.num_classes, args.ignore_label)
    loss_.add_flags_recursive(fp32=True)
    train_net = BuildTrainNetwork(network, loss_)

    # load pretrained model
    if args.ckpt_pre_trained:
        param_dict = load_checkpoint(args.ckpt_pre_trained)
        load_param_into_net(train_net, param_dict)

    # optimizer
    iters_per_epoch = dataset.get_dataset_size()
    total_train_steps = iters_per_epoch * args.train_epochs
    if args.lr_type == 'cos':
        lr_iter = learning_rates.cosine_lr(args.base_lr, total_train_steps,
                                           total_train_steps)
    elif args.lr_type == 'poly':
        lr_iter = learning_rates.poly_lr(args.base_lr,
                                         total_train_steps,
                                         total_train_steps,
                                         end_lr=0.0,
                                         power=0.9)
    elif args.lr_type == 'exp':
        lr_iter = learning_rates.exponential_lr(args.base_lr,
                                                args.lr_decay_step,
                                                args.lr_decay_rate,
                                                total_train_steps,
                                                staircase=True)
    else:
        raise ValueError('unknown learning rate type')
    opt = nn.Momentum(params=train_net.trainable_params(),
                      learning_rate=lr_iter,
                      momentum=0.9,
                      weight_decay=0.0001,
                      loss_scale=args.loss_scale)

    # loss scale
    manager_loss_scale = FixedLossScaleManager(args.loss_scale,
                                               drop_overflow_update=False)
    model = Model(train_net,
                  optimizer=opt,
                  amp_level="O3",
                  loss_scale_manager=manager_loss_scale)

    # callback for saving ckpts
    time_cb = TimeMonitor(data_size=iters_per_epoch)
    loss_cb = LossMonitor()
    cbs = [time_cb, loss_cb]

    if args.rank == 0:
        config_ck = CheckpointConfig(
            save_checkpoint_steps=args.save_steps,
            keep_checkpoint_max=args.keep_checkpoint_max)
        ckpoint_cb = ModelCheckpoint(prefix=args.model,
                                     directory=args.train_dir,
                                     config=config_ck)
        cbs.append(ckpoint_cb)

    model.train(args.train_epochs, dataset, callbacks=cbs)
示例#19
0
def train_process_thor(q, device_id, epoch_size, device_num, enable_hccl):
    os.system("mkdir " + str(device_id))
    os.chdir(str(device_id))
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend",
                        save_graphs=False)
    context.set_context(device_id=device_id)
    os.environ['MINDSPORE_HCCL_CONFIG_PATH'] = MINDSPORE_HCCL_CONFIG_PATH_2
    os.environ['RANK_ID'] = str(device_id - 4)
    os.environ['RANK_SIZE'] = str(device_num)
    if enable_hccl:
        context.set_auto_parallel_context(
            device_num=device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            mirror_mean=True,
            parameter_broadcast=True)
        auto_parallel_context().set_all_reduce_fusion_split_indices(
            [107], "hccl_world_groupsum1")
        auto_parallel_context().set_all_reduce_fusion_split_indices(
            [27], "hccl_world_groupsum2")
        auto_parallel_context().set_all_reduce_fusion_split_indices(
            [27], "hccl_world_groupsum3")
        auto_parallel_context().set_all_reduce_fusion_split_indices(
            [27], "hccl_world_groupsum4")
        auto_parallel_context().set_all_reduce_fusion_split_indices(
            [27], "hccl_world_groupsum5")
        init()

    # network
    damping = get_model_damping(0, 0.03, 0.87, 50, 5004)
    net = resnet50_thor(class_num=thor_config.class_num,
                        damping=damping,
                        loss_scale=thor_config.loss_scale,
                        frequency=thor_config.frequency)

    # evaluation network
    dist_eval_network = ClassifyCorrectCell(net)

    if not thor_config.label_smooth:
        thor_config.label_smooth_factor = 0.0

    # loss
    loss = nn.SoftmaxCrossEntropyWithLogits(
        sparse=True,
        reduction="mean",
        smooth_factor=thor_config.label_smooth_factor,
        num_classes=thor_config.class_num)

    # train dataset
    dataset = create_dataset(dataset_path=dataset_path,
                             do_train=True,
                             repeat_num=epoch_size,
                             batch_size=thor_config.batch_size)

    step_size = dataset.get_dataset_size()
    eval_interval = thor_config.eval_interval

    # evalutation dataset
    eval_dataset = create_dataset(dataset_path=eval_path,
                                  do_train=False,
                                  repeat_num=epoch_size,
                                  batch_size=thor_config.eval_batch_size)

    # loss scale
    loss_scale = FixedLossScaleManager(thor_config.loss_scale,
                                       drop_overflow_update=False)

    # learning rate
    lr = Tensor(get_model_lr(0, 0.045, 6, 70, 5004))

    # optimizer
    opt = THOR(filter(lambda x: x.requires_grad,
                      net.get_parameters()), lr, thor_config.momentum,
               filter(lambda x: 'matrix_A' in x.name, net.get_parameters()),
               filter(lambda x: 'matrix_G' in x.name, net.get_parameters()),
               filter(lambda x: 'A_inv_max' in x.name, net.get_parameters()),
               filter(lambda x: 'G_inv_max' in x.name, net.get_parameters()),
               thor_config.weight_decay, thor_config.loss_scale)

    # model
    model = THOR_Model(net,
                       loss_fn=loss,
                       optimizer=opt,
                       loss_scale_manager=loss_scale,
                       amp_level="O2",
                       keep_batchnorm_fp32=False,
                       metrics={
                           'acc':
                           DistAccuracy(batch_size=thor_config.eval_batch_size,
                                        device_num=device_num)
                       },
                       eval_network=dist_eval_network,
                       frequency=thor_config.frequency)

    # model init
    print("init_start", device_id)
    model.init(dataset, eval_dataset)
    print("init_stop", device_id)

    # callbacks
    loss_cb = LossGet(1, step_size)

    # train and eval
    acc = 0.0
    time_cost = 0.0
    print("run_start", device_id)
    for epoch_idx in range(0, int(epoch_size / eval_interval)):
        model.train(eval_interval, dataset, callbacks=loss_cb)
        eval_start = time.time()
        output = model.eval(eval_dataset)
        eval_cost = (time.time() - eval_start) * 1000
        acc = float(output["acc"])
        time_cost = loss_cb.get_per_step_time()
        loss = loss_cb.get_loss()
        print(
            "the {} epoch's resnet result:\n "
            "device{}, training loss {}, acc {}, "
            "training per step cost {:.2f} ms, eval cost {:.2f} ms, total_cost {:.2f} ms"
            .format(epoch_idx, device_id, loss, acc, time_cost, eval_cost,
                    time_cost * step_size + eval_cost))
    q.put({'acc': acc, 'cost': time_cost})
示例#20
0
def _build_training_pipeline(config: TransformerConfig,
                             pre_training_dataset=None,
                             fine_tune_dataset=None,
                             test_dataset=None,
                             platform="Ascend"):
    """
    Build training pipeline.

    Args:
        config (TransformerConfig): Config of mass model.
        pre_training_dataset (Dataset): Pre-training dataset.
        fine_tune_dataset (Dataset): Fine-tune dataset.
        test_dataset (Dataset): Test dataset.
    """
    net_with_loss = TransformerNetworkWithLoss(config, is_training=True)
    net_with_loss.init_parameters_data()

    if config.existed_ckpt:
        if config.existed_ckpt.endswith(".npz"):
            weights = np.load(config.existed_ckpt)
        else:
            weights = load_checkpoint(config.existed_ckpt)
        for param in net_with_loss.trainable_params():
            weights_name = param.name
            if weights_name not in weights:
                raise ValueError(
                    f"Param {weights_name} is not found in ckpt file.")

            if isinstance(weights[weights_name], Parameter):
                param.set_data(weights[weights_name].data)
            elif isinstance(weights[weights_name], Tensor):
                param.set_data(
                    Tensor(weights[weights_name].asnumpy(), config.dtype))
            elif isinstance(weights[weights_name], np.ndarray):
                param.set_data(Tensor(weights[weights_name], config.dtype))
            else:
                param.set_data(weights[weights_name])
    else:
        for param in net_with_loss.trainable_params():
            name = param.name
            value = param.data
            if isinstance(value, Tensor):
                if name.endswith(".gamma"):
                    param.set_data(one_weight(value.asnumpy().shape))
                elif name.endswith(".beta") or name.endswith(".bias"):
                    param.set_data(zero_weight(value.asnumpy().shape))
                else:
                    param.set_data(weight_variable(value.asnumpy().shape))

    dataset = pre_training_dataset if pre_training_dataset is not None \
        else fine_tune_dataset

    if dataset is None:
        raise ValueError(
            "pre-training dataset or fine-tuning dataset must be provided one."
        )

    update_steps = config.epochs * dataset.get_dataset_size()
    if config.lr_scheduler == "isr":
        lr = Tensor(square_root_schedule(
            lr=config.lr,
            update_num=update_steps,
            decay_start_step=config.decay_start_step,
            warmup_steps=config.warmup_steps,
            min_lr=config.min_lr),
                    dtype=mstype.float32)
    elif config.lr_scheduler == "poly":
        lr = Tensor(polynomial_decay_scheduler(
            lr=config.lr,
            min_lr=config.min_lr,
            decay_steps=config.decay_steps,
            total_update_num=update_steps,
            warmup_steps=config.warmup_steps,
            power=config.poly_lr_scheduler_power),
                    dtype=mstype.float32)
    else:
        lr = config.lr

    if config.optimizer.lower() == "adam":
        optimizer = Adam(net_with_loss.trainable_params(),
                         lr,
                         beta1=0.9,
                         beta2=0.98)
    elif config.optimizer.lower() == "lamb":
        lr = BertLearningRate(decay_steps=12000,
                              learning_rate=config.lr,
                              end_learning_rate=config.min_lr,
                              power=10.0,
                              warmup_steps=config.warmup_steps)
        decay_params = list(
            filter(
                lambda x: 'layernorm' not in x.name.lower() and 'bias' not in x
                .name.lower(), net_with_loss.trainable_params()))
        other_params = list(
            filter(
                lambda x: 'layernorm' in x.name.lower() or 'bias' in x.name.
                lower(), net_with_loss.trainable_params()))
        group_params = [{
            'params': decay_params,
            'weight_decay': 0.01
        }, {
            'params': other_params
        }]

        optimizer = Lamb(group_params, lr, eps=1e-6)
    elif config.optimizer.lower() == "momentum":
        optimizer = Momentum(net_with_loss.trainable_params(),
                             lr,
                             momentum=0.9)
    else:
        raise ValueError(f"optimizer only support `adam` and `momentum` now.")

    # loss scale.
    if config.loss_scale_mode == "dynamic":
        scale_manager = DynamicLossScaleManager(
            init_loss_scale=config.init_loss_scale,
            scale_factor=config.loss_scale_factor,
            scale_window=config.scale_window)
    else:
        scale_manager = FixedLossScaleManager(
            loss_scale=config.init_loss_scale, drop_overflow_update=True)
    net_with_grads = TransformerTrainOneStepWithLossScaleCell(
        network=net_with_loss,
        optimizer=optimizer,
        scale_update_cell=scale_manager.get_update_cell())
    net_with_grads.set_train(True)
    model = Model(net_with_grads)
    ckpt_config = CheckpointConfig(
        save_checkpoint_steps=config.save_ckpt_steps,
        keep_checkpoint_max=config.keep_ckpt_max)

    rank_size = os.getenv('RANK_SIZE')
    callbacks = []
    if rank_size is not None and int(rank_size) > 1:
        loss_monitor = LossCallBack(config, rank_id=MultiAscend.get_rank())
        callbacks.append(loss_monitor)
        if MultiAscend.get_rank() % 8 == 0:
            ckpt_callback = ModelCheckpoint(prefix=config.ckpt_prefix,
                                            directory=os.path.join(
                                                config.ckpt_path,
                                                'ckpt_{}'.format(
                                                    MultiAscend.get_rank())),
                                            config=ckpt_config)
            callbacks.append(ckpt_callback)

    if rank_size is None or int(rank_size) == 1:
        ckpt_callback = ModelCheckpoint(
            prefix=config.ckpt_prefix,
            directory=os.path.join(config.ckpt_path,
                                   'ckpt_{}'.format(os.getenv('DEVICE_ID'))),
            config=ckpt_config)
        loss_monitor = LossCallBack(config, rank_id=os.getenv('DEVICE_ID'))
        callbacks.append(loss_monitor)
        callbacks.append(ckpt_callback)

    print(f" | ALL SET, PREPARE TO TRAIN.")
    _train(model=model,
           config=config,
           pre_training_dataset=pre_training_dataset,
           fine_tune_dataset=fine_tune_dataset,
           test_dataset=test_dataset,
           callbacks=callbacks)
示例#21
0
def train(net, ckpt_save_dir, target):  # pylint: disable=too-many-locals
    """ train the network"""
    # create dataset
    train_dataset = create_dataset(dataset_path=ARGS_OPT.train_dataset,
                                   do_train=True,
                                   repeat_num=1,
                                   batch_size=config.batch_size,
                                   target=target)  # pylint: disable=no-member
    step_size = train_dataset.get_dataset_size()
    # init lr
    learning_rate = get_lr(
        lr_init=config.lr_init,
        lr_end=config.lr_end,  # pylint: disable=no-member
        lr_max=config.lr_max,
        warmup_epochs=config.warmup_epochs,  # pylint: disable=no-member
        total_epochs=config.epoch_size,  # pylint: disable=no-member
        steps_per_epoch=step_size,
        lr_decay_mode=config.lr_decay_mode)  # pylint: disable=no-member
    learning_rate = Tensor(learning_rate)

    # define opt
    decayed_params = []
    no_decayed_params = []
    for param in net.trainable_params():
        if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
            decayed_params.append(param)
        else:
            no_decayed_params.append(param)

    group_params = [
        {
            'params': decayed_params,
            'weight_decay': config.weight_decay
        },  # pylint: disable=no-member
        {
            'params': no_decayed_params
        },
        {
            'order_params': net.trainable_params()
        }
    ]
    opt = Momentum(group_params,
                   learning_rate,
                   config.momentum,
                   loss_scale=config.loss_scale)  # pylint: disable=no-member

    # define loss, model
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    loss_scale = FixedLossScaleManager(config.loss_scale,
                                       drop_overflow_update=False)  # pylint: disable=no-member
    model = Model(net,
                  loss_fn=loss,
                  optimizer=opt,
                  loss_scale_manager=loss_scale,
                  metrics={'top_1_accuracy', 'top_5_accuracy'},
                  amp_level="O2",
                  keep_batchnorm_fp32=False)

    # define callbacks
    time_cb = TimeMonitor(data_size=step_size)
    loss_cb = LossMonitor()
    callbacks = [time_cb, loss_cb]
    if config.save_checkpoint:  # pylint: disable=no-member
        config_ck = CheckpointConfig(
            save_checkpoint_steps=config.save_checkpoint_epochs * step_size,  # pylint: disable=no-member
            keep_checkpoint_max=config.keep_checkpoint_max)  # pylint: disable=no-member
        ckpt_cb = ModelCheckpoint(prefix="resnet",
                                  directory=ckpt_save_dir,
                                  config=config_ck)
        callbacks += [ckpt_cb]

    # define the eval call back
    epochs_per_eval = {"epoch": [], "acc": []}
    if not ARGS_OPT.run_distribute:
        eval_dataset = create_dataset(
            dataset_path=ARGS_OPT.eval_dataset,
            do_train=False,
            batch_size=config.batch_size,  # pylint: disable=no-member
            target=target)
        eval_cb = EvalCallBack(model, eval_dataset, 1, epochs_per_eval)
        callbacks.append(eval_cb)

    # start training the qunat aware training network
    model.train(
        config.epoch_size,
        train_dataset,
        callbacks=callbacks,  # pylint: disable=no-member
        sink_size=train_dataset.get_dataset_size(),
        dataset_sink_mode=False)
    if not ARGS_OPT.run_distribute:
        print(
            "***************** evaluation results of training process ***************** "
        )
        print(epochs_per_eval)
示例#22
0
def train(cloud_args=None):
    """training process"""
    args = parse_args(cloud_args)
    context.set_context(mode=context.GRAPH_MODE,
                        enable_auto_mixed_precision=True,
                        device_target=args.platform,
                        save_graphs=False)
    if os.getenv('DEVICE_ID', "not_set").isdigit():
        context.set_context(device_id=int(os.getenv('DEVICE_ID')))

    # init distributed
    if args.is_distributed:
        parallel_mode = ParallelMode.DATA_PARALLEL
        context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                          device_num=args.group_size,
                                          gradients_mean=True)
    # dataloader
    de_dataset = classification_dataset(args.data_dir,
                                        args.image_size,
                                        args.per_batch_size,
                                        1,
                                        args.rank,
                                        args.group_size,
                                        num_parallel_workers=8)
    de_dataset.map_model = 4  # !!!important
    args.steps_per_epoch = de_dataset.get_dataset_size()

    args.logger.save_args(args)

    # network
    args.logger.important_info('start create network')
    # get network and init
    network = get_network(args.backbone,
                          num_classes=args.num_classes,
                          platform=args.platform)
    if network is None:
        raise NotImplementedError('not implement {}'.format(args.backbone))

    load_pretrain_model(args.pretrained, network, args)

    # lr scheduler
    lr = get_lr(args)

    # optimizer
    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)

    # loss
    if not args.label_smooth:
        args.label_smooth_factor = 0.0
    loss = CrossEntropy(smooth_factor=args.label_smooth_factor,
                        num_classes=args.num_classes)

    if args.is_dynamic_loss_scale == 1:
        loss_scale_manager = DynamicLossScaleManager(init_loss_scale=65536,
                                                     scale_factor=2,
                                                     scale_window=2000)
    else:
        loss_scale_manager = FixedLossScaleManager(args.loss_scale,
                                                   drop_overflow_update=False)

    if args.platform == "Ascend":
        model = Model(network,
                      loss_fn=loss,
                      optimizer=opt,
                      loss_scale_manager=loss_scale_manager,
                      metrics={'acc'},
                      amp_level="O3")
    else:
        model = Model(network,
                      loss_fn=loss,
                      optimizer=opt,
                      loss_scale_manager=loss_scale_manager,
                      metrics={'acc'},
                      amp_level="O2")

    # checkpoint save
    progress_cb = ProgressMonitor(args)
    callbacks = [
        progress_cb,
    ]
    if args.rank_save_ckpt_flag:
        ckpt_config = CheckpointConfig(
            save_checkpoint_steps=args.ckpt_interval * args.steps_per_epoch,
            keep_checkpoint_max=args.ckpt_save_max)
        save_ckpt_path = os.path.join(args.outputs_dir,
                                      'ckpt_' + str(args.rank) + '/')
        ckpt_cb = ModelCheckpoint(config=ckpt_config,
                                  directory=save_ckpt_path,
                                  prefix='{}'.format(args.rank))
        callbacks.append(ckpt_cb)

    model.train(args.max_epoch,
                de_dataset,
                callbacks=callbacks,
                dataset_sink_mode=True)
示例#23
0
文件: train.py 项目: yrpang/mindspore
def dpn_train(args):
    # init context
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Ascend", save_graphs=False, device_id=device_id)
    # init distributed
    if args.is_distributed:
        init()
        args.rank = get_rank()
        args.group_size = get_group_size()
        context.set_auto_parallel_context(device_num=args.group_size, parallel_mode=ParallelMode.DATA_PARALLEL,
                                          gradients_mean=True)

    # select for master rank save ckpt or all rank save, compatible for model parallel
    args.rank_save_ckpt_flag = 0
    if args.is_save_on_master:
        if args.rank == 0:
            args.rank_save_ckpt_flag = 1
    else:
        args.rank_save_ckpt_flag = 1
    # create dataset
    args.train_dir = os.path.join(args.data_dir, 'train')
    args.eval_dir = os.path.join(args.data_dir, 'val')
    train_dataset = classification_dataset(args.train_dir,
                                           image_size=args.image_size,
                                           per_batch_size=args.batch_size,
                                           max_epoch=1,
                                           num_parallel_workers=args.num_parallel_workers,
                                           shuffle=True,
                                           rank=args.rank,
                                           group_size=args.group_size)
    if args.eval_each_epoch:
        print("create eval_dataset")
        eval_dataset = classification_dataset(args.eval_dir,
                                              image_size=args.image_size,
                                              per_batch_size=args.batch_size,
                                              max_epoch=1,
                                              num_parallel_workers=args.num_parallel_workers,
                                              shuffle=False,
                                              rank=args.rank,
                                              group_size=args.group_size,
                                              mode='eval')
    train_step_size = train_dataset.get_dataset_size()

    # choose net
    net = dpns[args.backbone](num_classes=args.num_classes)

    # load checkpoint
    if os.path.isfile(args.pretrained):
        print("load ckpt")
        load_param_into_net(net, load_checkpoint(args.pretrained))
    # learing rate schedule
    if args.lr_schedule == 'drop':
        print("lr_schedule:drop")
        lr = Tensor(get_lr_drop(global_step=args.global_step,
                                total_epochs=args.epoch_size,
                                steps_per_epoch=train_step_size,
                                lr_init=args.lr_init,
                                factor=args.factor))
    elif args.lr_schedule == 'warmup':
        print("lr_schedule:warmup")
        lr = Tensor(get_lr_warmup(global_step=args.global_step,
                                  total_epochs=args.epoch_size,
                                  steps_per_epoch=train_step_size,
                                  lr_init=args.lr_init,
                                  lr_max=args.lr_max,
                                  warmup_epochs=args.warmup_epochs))

    # optimizer
    opt = SGD(net.trainable_params(),
              lr,
              momentum=args.momentum,
              weight_decay=args.weight_decay,
              loss_scale=args.loss_scale_num)
    # loss scale
    loss_scale = FixedLossScaleManager(args.loss_scale_num, False)
    # loss function
    if args.dataset == "imagenet-1K":
        print("Use SoftmaxCrossEntropyWithLogits")
        loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    else:
        if not args.label_smooth:
            args.label_smooth_factor = 0.0
        print("Use Label_smooth CrossEntropy")
        loss = CrossEntropy(smooth_factor=args.label_smooth_factor, num_classes=args.num_classes)
    # create model
    model = Model(net, amp_level="O2",
                  keep_batchnorm_fp32=False,
                  loss_fn=loss,
                  optimizer=opt,
                  loss_scale_manager=loss_scale,
                  metrics={'top_1_accuracy', 'top_5_accuracy'})

    # loss/time monitor & ckpt save callback
    loss_cb = LossMonitor()
    time_cb = TimeMonitor(data_size=train_step_size)
    cb = [loss_cb, time_cb]
    if args.rank_save_ckpt_flag:
        if args.eval_each_epoch:
            save_cb = SaveCallback(model, eval_dataset, args.ckpt_path)
            cb += [save_cb]
        else:
            config_ck = CheckpointConfig(save_checkpoint_steps=train_step_size,
                                         keep_checkpoint_max=args.keep_checkpoint_max)
            ckpoint_cb = ModelCheckpoint(prefix="dpn", directory=args.ckpt_path, config=config_ck)
            cb.append(ckpoint_cb)
    # train model
    model.train(args.epoch_size, train_dataset, callbacks=cb)
示例#24
0
文件: train.py 项目: yrpang/mindspore
def train(cloud_args=None):
    """training process"""
    args = parse_args(cloud_args)

    context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True,
                        device_target=args.device_target, save_graphs=False)

    if args.device_target == 'Ascend':
        devid = int(os.getenv('DEVICE_ID'))
        context.set_context(device_id=devid)

    # init distributed
    if args.is_distributed:
        init()
        args.rank = get_rank()
        args.group_size = get_group_size()

    if args.is_dynamic_loss_scale == 1:
        args.loss_scale = 1  # for dynamic loss scale can not set loss scale in momentum opt

    # select for master rank save ckpt or all rank save, compatible for model parallel
    args.rank_save_ckpt_flag = 0
    if args.is_save_on_master:
        if args.rank == 0:
            args.rank_save_ckpt_flag = 1
    else:
        args.rank_save_ckpt_flag = 1

    # logger
    args.outputs_dir = os.path.join(args.ckpt_path,
                                    datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    args.logger = get_logger(args.outputs_dir, args.rank)

    if args.net == "densenet100":
        from src.network.densenet import DenseNet100 as DenseNet
    else:
        from src.network.densenet import DenseNet121 as DenseNet

    if args.dataset == "cifar10":
        from src.datasets import classification_dataset_cifar10 as classification_dataset
    else:
        from src.datasets import classification_dataset_imagenet as classification_dataset

    # dataloader
    de_dataset = classification_dataset(args.data_dir, args.image_size, args.per_batch_size, args.max_epoch,
                                        args.rank, args.group_size)
    de_dataset.map_model = 4
    args.steps_per_epoch = de_dataset.get_dataset_size()

    args.logger.save_args(args)

    # network
    args.logger.important_info('start create network')
    # get network and init
    network = DenseNet(args.num_classes)
    # loss
    if not args.label_smooth:
        args.label_smooth_factor = 0.0
    criterion = CrossEntropy(smooth_factor=args.label_smooth_factor, num_classes=args.num_classes)

    # load pretrain model
    if os.path.isfile(args.pretrained):
        param_dict = load_checkpoint(args.pretrained)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('network.'):
                param_dict_new[key[8:]] = values
            else:
                param_dict_new[key] = values
        load_param_into_net(network, param_dict_new)
        args.logger.info('load model {} success'.format(args.pretrained))

    # lr scheduler
    lr_scheduler = get_lr_scheduler(args)
    lr_schedule = lr_scheduler.get_lr()

    # optimizer
    opt = Momentum(params=get_param_groups(network), learning_rate=Tensor(lr_schedule),
                   momentum=args.momentum, weight_decay=args.weight_decay, loss_scale=args.loss_scale)

    # mixed precision training
    criterion.add_flags_recursive(fp32=True)

    # package training process, adjust lr + forward + backward + optimizer
    train_net = BuildTrainNetwork(network, criterion)
    if args.is_distributed:
        parallel_mode = ParallelMode.DATA_PARALLEL
    else:
        parallel_mode = ParallelMode.STAND_ALONE
    if args.is_dynamic_loss_scale == 1:
        loss_scale_manager = DynamicLossScaleManager(init_loss_scale=65536, scale_factor=2, scale_window=2000)
    else:
        loss_scale_manager = FixedLossScaleManager(args.loss_scale, drop_overflow_update=False)

    context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=args.group_size,
                                      gradients_mean=True)

    if args.device_target == 'Ascend':
        model = Model(train_net, optimizer=opt, metrics=None, loss_scale_manager=loss_scale_manager, amp_level="O3")
    elif args.device_target == 'GPU':
        model = Model(train_net, optimizer=opt, metrics=None, loss_scale_manager=loss_scale_manager, amp_level="O0")
    elif args.device_target == 'CPU':
        model = Model(train_net, optimizer=opt, metrics=None, loss_scale_manager=loss_scale_manager, amp_level="O0")
    else:
        raise ValueError("Unsupported device target.")

    # checkpoint save
    progress_cb = ProgressMonitor(args)
    callbacks = [progress_cb,]
    if args.rank_save_ckpt_flag:
        ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
        ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval,
                                       keep_checkpoint_max=ckpt_max_num)
        ckpt_cb = ModelCheckpoint(config=ckpt_config, directory=args.outputs_dir,
                                  prefix='{}'.format(args.rank))
        callbacks.append(ckpt_cb)

    model.train(args.max_epoch, de_dataset, callbacks=callbacks)
示例#25
0
        else:
            no_decayed_params.append(param)

    group_params = [{
        'params': decayed_params,
        'weight_decay': 1e-4
    }, {
        'params': no_decayed_params
    }, {
        'order_params': net.trainable_params()
    }]
    # define loss, model
    loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr,
                   0.9, 1e-4, 1024)
    loss_scale = FixedLossScaleManager(1024, drop_overflow_update=False)
    model = Model(net,
                  loss_fn=loss,
                  optimizer=opt,
                  loss_scale_manager=loss_scale,
                  metrics={'acc'})
    # Mixed precision
    if compute_type == "fp16":
        model = Model(net,
                      loss_fn=loss,
                      optimizer=opt,
                      loss_scale_manager=loss_scale,
                      metrics={'acc'},
                      amp_level="O2",
                      keep_batchnorm_fp32=False)
    # define callbacks
示例#26
0
def train():
    """Train function."""
    args = parse_args()
    devid = int(os.getenv('DEVICE_ID', '0'))
    context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True,
                        device_target=args.device_target, save_graphs=True, device_id=devid)
    if args.need_profiler:
        from mindspore.profiler.profiling import Profiler
        profiler = Profiler(output_path=args.outputs_dir, is_detail=True, is_show_op_path=True)

    loss_meter = AverageMeter('loss')

    context.reset_auto_parallel_context()
    parallel_mode = ParallelMode.STAND_ALONE
    degree = 1
    if args.is_distributed:
        parallel_mode = ParallelMode.DATA_PARALLEL
        degree = get_group_size()
    context.set_auto_parallel_context(parallel_mode=parallel_mode, gradients_mean=True, device_num=degree)

    network = YOLOV3DarkNet53(is_training=True)
    # default is kaiming-normal
    default_recurisive_init(network)
    load_yolov3_params(args, network)

    network = YoloWithLossCell(network)
    args.logger.info('finish get network')

    config = ConfigYOLOV3DarkNet53()

    config.label_smooth = args.label_smooth
    config.label_smooth_factor = args.label_smooth_factor

    if args.training_shape:
        config.multi_scale = [conver_training_shape(args)]
    if args.resize_rate:
        config.resize_rate = args.resize_rate

    ds, data_size = create_yolo_dataset(image_dir=args.data_root, anno_path=args.annFile, is_training=True,
                                        batch_size=args.per_batch_size, max_epoch=args.max_epoch,
                                        device_num=args.group_size, rank=args.rank, config=config)
    args.logger.info('Finish loading dataset')

    args.steps_per_epoch = int(data_size / args.per_batch_size / args.group_size)

    if not args.ckpt_interval:
        args.ckpt_interval = args.steps_per_epoch

    lr = get_lr(args)

    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)
    is_gpu = context.get_context("device_target") == "GPU"
    if is_gpu:
        loss_scale_value = 1.0
        loss_scale = FixedLossScaleManager(loss_scale_value, drop_overflow_update=False)
        network = amp.build_train_network(network, optimizer=opt, loss_scale_manager=loss_scale,
                                          level="O2", keep_batchnorm_fp32=True)
        keep_loss_fp32(network)
    else:
        network = TrainingWrapper(network, opt)
        network.set_train()

    if args.rank_save_ckpt_flag:
        # checkpoint save
        ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
        ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval,
                                       keep_checkpoint_max=ckpt_max_num)
        save_ckpt_path = os.path.join(args.outputs_dir, 'ckpt_' + str(args.rank) + '/')
        ckpt_cb = ModelCheckpoint(config=ckpt_config,
                                  directory=save_ckpt_path,
                                  prefix='{}'.format(args.rank))
        cb_params = _InternalCallbackParam()
        cb_params.train_network = network
        cb_params.epoch_num = ckpt_max_num
        cb_params.cur_epoch_num = 1
        run_context = RunContext(cb_params)
        ckpt_cb.begin(run_context)

    old_progress = -1
    t_end = time.time()
    data_loader = ds.create_dict_iterator(output_numpy=True)

    for i, data in enumerate(data_loader):
        images = data["image"]
        input_shape = images.shape[2:4]
        args.logger.info('iter[{}], shape{}'.format(i, input_shape[0]))

        images = Tensor.from_numpy(images)

        batch_y_true_0 = Tensor.from_numpy(data['bbox1'])
        batch_y_true_1 = Tensor.from_numpy(data['bbox2'])
        batch_y_true_2 = Tensor.from_numpy(data['bbox3'])
        batch_gt_box0 = Tensor.from_numpy(data['gt_box1'])
        batch_gt_box1 = Tensor.from_numpy(data['gt_box2'])
        batch_gt_box2 = Tensor.from_numpy(data['gt_box3'])

        input_shape = Tensor(tuple(input_shape[::-1]), ms.float32)
        loss = network(images, batch_y_true_0, batch_y_true_1, batch_y_true_2, batch_gt_box0, batch_gt_box1,
                       batch_gt_box2, input_shape)
        loss_meter.update(loss.asnumpy())

        if args.rank_save_ckpt_flag:
            # ckpt progress
            cb_params.cur_step_num = i + 1  # current step number
            cb_params.batch_num = i + 2
            ckpt_cb.step_end(run_context)

        if i % args.log_interval == 0:
            time_used = time.time() - t_end
            epoch = int(i / args.steps_per_epoch)
            fps = args.per_batch_size * (i - old_progress) * args.group_size / time_used
            if args.rank == 0:
                args.logger.info(
                    'epoch[{}], iter[{}], {}, {:.2f} imgs/sec, lr:{}'.format(epoch, i, loss_meter, fps, lr[i]))
            t_end = time.time()
            loss_meter.reset()
            old_progress = i

        if (i + 1) % args.steps_per_epoch == 0 and args.rank_save_ckpt_flag:
            cb_params.cur_epoch_num += 1

        if args.need_profiler:
            if i == 10:
                profiler.analyse()
                break

    args.logger.info('==========end training===============')
示例#27
0
    # define dataset
    dataset = create_dataset(args_opt.dataset_path,
                             do_train=True,
                             batch_size=config.batch_size,
                             device_num=group_size,
                             rank=rank)
    step_size = dataset.get_dataset_size()

    # resume
    if args_opt.resume:
        ckpt = load_checkpoint(args_opt.resume)
        load_param_into_net(net, ckpt)

    # get learning rate
    loss_scale = FixedLossScaleManager(config.loss_scale,
                                       drop_overflow_update=False)
    lr = Tensor(
        get_lr(lr_init=config.lr_init,
               lr_end=config.lr_end,
               lr_max=config.lr_max,
               warmup_epochs=config.warmup_epochs,
               total_epochs=config.epoch_size,
               steps_per_epoch=step_size,
               lr_decay_mode=config.lr_decay_mode,
               global_step=config.finish_epoch * step_size))

    # define optimization and model
    if args_opt.device_target == "Ascend":
        opt = Momentum(net.trainable_params(), lr, config.momentum,
                       config.weight_decay, config.loss_scale)
        model = Model(net,
示例#28
0
def train(cloud_args=None):
    """training process"""
    args = parse_args(cloud_args)

    # init distributed
    if args.is_distributed:
        init()
        args.rank = get_rank()
        args.group_size = get_group_size()

    if args.is_dynamic_loss_scale == 1:
        args.loss_scale = 1  # for dynamic loss scale can not set loss scale in momentum opt

    # select for master rank save ckpt or all rank save, compatiable for model parallel
    args.rank_save_ckpt_flag = 0
    if args.is_save_on_master:
        if args.rank == 0:
            args.rank_save_ckpt_flag = 1
    else:
        args.rank_save_ckpt_flag = 1

    # logger
    args.outputs_dir = os.path.join(args.ckpt_path,
                                    datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    args.logger = get_logger(args.outputs_dir, args.rank)

    # dataloader
    de_dataset = classification_dataset(args.data_dir, args.image_size,
                                        args.per_batch_size, args.max_epoch,
                                        args.rank, args.group_size)
    de_dataset.map_model = 4  # !!!important
    args.steps_per_epoch = de_dataset.get_dataset_size()

    args.logger.save_args(args)

    # network
    args.logger.important_info('start create network')
    # get network and init
    network = get_network(args.backbone, args.num_classes)
    if network is None:
        raise NotImplementedError('not implement {}'.format(args.backbone))
    network.add_flags_recursive(fp16=True)
    # loss
    if not args.label_smooth:
        args.label_smooth_factor = 0.0
    criterion = CrossEntropy(smooth_factor=args.label_smooth_factor,
                             num_classes=args.num_classes)

    # load pretrain model
    if os.path.isfile(args.pretrained):
        param_dict = load_checkpoint(args.pretrained)
        param_dict_new = {}
        for key, values in param_dict.items():
            if key.startswith('moments.'):
                continue
            elif key.startswith('network.'):
                param_dict_new[key[8:]] = values
            else:
                param_dict_new[key] = values
        load_param_into_net(network, param_dict_new)
        args.logger.info('load model {} success'.format(args.pretrained))

    # lr scheduler
    if args.lr_scheduler == 'exponential':
        lr = warmup_step_lr(args.lr,
                            args.lr_epochs,
                            args.steps_per_epoch,
                            args.warmup_epochs,
                            args.max_epoch,
                            gamma=args.lr_gamma,
                            )
    elif args.lr_scheduler == 'cosine_annealing':
        lr = warmup_cosine_annealing_lr(args.lr,
                                        args.steps_per_epoch,
                                        args.warmup_epochs,
                                        args.max_epoch,
                                        args.T_max,
                                        args.eta_min)
    else:
        raise NotImplementedError(args.lr_scheduler)

    # optimizer
    opt = Momentum(params=get_param_groups(network),
                   learning_rate=Tensor(lr),
                   momentum=args.momentum,
                   weight_decay=args.weight_decay,
                   loss_scale=args.loss_scale)


    criterion.add_flags_recursive(fp32=True)

    # package training process, adjust lr + forward + backward + optimizer
    train_net = BuildTrainNetwork(network, criterion)
    if args.is_distributed:
        parallel_mode = ParallelMode.DATA_PARALLEL
    else:
        parallel_mode = ParallelMode.STAND_ALONE
    if args.is_dynamic_loss_scale == 1:
        loss_scale_manager = DynamicLossScaleManager(init_loss_scale=65536, scale_factor=2, scale_window=2000)
    else:
        loss_scale_manager = FixedLossScaleManager(args.loss_scale, drop_overflow_update=False)

    # Model api changed since TR5_branch 2020/03/09
    context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=args.group_size,
                                      parameter_broadcast=True, mirror_mean=True)
    model = Model(train_net, optimizer=opt, metrics=None, loss_scale_manager=loss_scale_manager)

    # checkpoint save
    progress_cb = ProgressMonitor(args)
    callbacks = [progress_cb,]
    if args.rank_save_ckpt_flag:
        ckpt_max_num = args.max_epoch * args.steps_per_epoch // args.ckpt_interval
        ckpt_config = CheckpointConfig(save_checkpoint_steps=args.ckpt_interval,
                                       keep_checkpoint_max=ckpt_max_num)
        ckpt_cb = ModelCheckpoint(config=ckpt_config,
                                  directory=args.outputs_dir,
                                  prefix='{}'.format(args.rank))
        callbacks.append(ckpt_cb)

    model.train(args.max_epoch, de_dataset, callbacks=callbacks, dataset_sink_mode=True)
示例#29
0
def train_on_gpu():
    config = config_gpu_quant
    print("training args: {}".format(args_opt))
    print("training configure: {}".format(config))

    # define network
    network = mobilenetV2(num_classes=config.num_classes)
    # define loss
    if config.label_smooth > 0:
        loss = CrossEntropyWithLabelSmooth(smooth_factor=config.label_smooth,
                                           num_classes=config.num_classes)
    else:
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
    # define dataset
    epoch_size = config.epoch_size
    dataset = create_dataset(dataset_path=args_opt.dataset_path,
                             do_train=True,
                             config=config,
                             device_target=args_opt.device_target,
                             repeat_num=1,
                             batch_size=config.batch_size)
    step_size = dataset.get_dataset_size()
    # resume
    if args_opt.pre_trained:
        param_dict = load_checkpoint(args_opt.pre_trained)
        load_nonquant_param_into_quant_net(network, param_dict)

    # convert fusion network to quantization aware network
    network = quant.convert_quant_network(network,
                                          bn_fold=True,
                                          per_channel=[True, False],
                                          symmetric=[True, False],
                                          freeze_bn=1000000,
                                          quant_delay=step_size * 2)

    # get learning rate
    loss_scale = FixedLossScaleManager(config.loss_scale,
                                       drop_overflow_update=False)
    lr = Tensor(
        get_lr(global_step=config.start_epoch * step_size,
               lr_init=0,
               lr_end=0,
               lr_max=config.lr,
               warmup_epochs=config.warmup_epochs,
               total_epochs=epoch_size + config.start_epoch,
               steps_per_epoch=step_size))

    # define optimization
    opt = nn.Momentum(
        filter(lambda x: x.requires_grad, network.get_parameters()), lr,
        config.momentum, config.weight_decay, config.loss_scale)
    # define model
    model = Model(network,
                  loss_fn=loss,
                  optimizer=opt,
                  loss_scale_manager=loss_scale)

    print("============== Starting Training ==============")
    callback = [Monitor(lr_init=lr.asnumpy())]
    ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(
        get_rank()) + "/"
    if config.save_checkpoint:
        config_ck = CheckpointConfig(
            save_checkpoint_steps=config.save_checkpoint_epochs * step_size,
            keep_checkpoint_max=config.keep_checkpoint_max)
        ckpt_cb = ModelCheckpoint(prefix="mobilenetV2",
                                  directory=ckpt_save_dir,
                                  config=config_ck)
        callback += [ckpt_cb]
    model.train(epoch_size, dataset, callbacks=callback)
    print("============== End Training ==============")
示例#30
0
def train():
    args = parse_args()

    # backend
    assert args.device_target == 'GPU'
    context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
    if args.distributed:
        init("nccl")
        args.rank = get_rank()
        args.group_size = get_group_size()
        context.set_auto_parallel_context(parallel_mode=context.ParallelMode.DATA_PARALLEL,
                                          gradients_mean=True,
                                          device_num=args.group_size)
    # experiments directory
    args.train_dir = os.path.join(env_dir, args.train_dir, 'ckpt')
    if args.rank == 0:
        if os.path.exists(args.train_dir):
            shutil.rmtree(args.train_dir, ignore_errors=True)  # rm existing dir
        makedir_p(args.train_dir)
    args.data_file = os.path.join(env_dir, args.data_file)

    # dataset
    dataset = TransformSegDataset(data_file=args.data_file,
                                  batch_size=args.batch_size,
                                  crop_size=args.crop_size,
                                  min_scale=args.min_scale,
                                  max_scale=args.max_scale,
                                  ignore_label=args.ignore_label,
                                  num_classes=args.num_classes,
                                  shard_id=args.rank,
                                  shard_num=args.group_size)
    dataset = dataset.get_transformed_dataset(repeat=1)

    # network
    network = get_model_by_name(args.model, nclass=args.num_classes, phase='train')
    loss = SoftmaxCrossEntropyLoss(args.num_classes, ignore_label=args.ignore_label)
    loss.add_flags_recursive(fp32=True)
    train_net = BuildTrainNetwork(network, loss)

    # optimizer
    iters_per_epoch = dataset.get_dataset_size()
    total_train_steps = iters_per_epoch * args.epochs
    lr_iter = lr_scheduler(lr_type=args.lr_type,
                           base_lr=args.base_lr,
                           total_train_steps=total_train_steps,
                           lr_decay_step=args.lr_decay_step,
                           lr_decay_rate=args.lr_decay_rate)
    opt = nn.Momentum(params=train_net.trainable_params(),
                      learning_rate=lr_iter,
                      momentum=args.momentum,
                      weight_decay=args.wd,
                      loss_scale=args.loss_scale)

    # loss scale
    manager_loss_scale = FixedLossScaleManager(args.loss_scale, drop_overflow_update=False)
    model = Model(train_net, optimizer=opt, amp_level='O0', loss_scale_manager=manager_loss_scale)

    # callback for saving ckpts
    time_cb = TimeMonitor(data_size=iters_per_epoch)
    loss_cb = LossMonitor()
    cbs = [time_cb, loss_cb]

    if args.rank == 0:
        config_ck = CheckpointConfig(save_checkpoint_steps=args.save_steps,
                                     keep_checkpoint_max=args.keep_checkpoint_max)
        ckpoint_cb = ModelCheckpoint(prefix=args.model, directory=args.train_dir, config=config_ck)
        cbs.append(ckpoint_cb)

    model.train(args.epochs, dataset, callbacks=cbs,
                dataset_sink_mode=(args.device_target != "CPU"))