示例#1
0
def test_init_xavier_uniform():
    """ test_init_xavier_uniform """
    gain = 1.2
    tensor1 = init.initializer(init.XavierUniform(gain=gain), [20, 22],
                               ms.float32)
    tensor2 = init.initializer(init.XavierUniform(), [20, 22], ms.float32)
    tensor3 = init.initializer(init.XavierUniform(gain=gain), [20, 22, 5, 5],
                               ms.float32)
    tensor4 = init.initializer(init.XavierUniform(), [20, 22, 5, 5],
                               ms.float32)
    tensor5 = init.initializer('xavier_uniform', [20, 22, 5, 5], ms.float32)
    tensor6 = init.initializer('xavier_uniform', [20, 22], ms.float32)
    tensor_dict = {
        tensor1: gain,
        tensor2: None,
        tensor3: gain,
        tensor4: None,
        tensor5: None,
        tensor6: None
    }

    for tensor, gain_value in tensor_dict.items():
        if gain_value is None:
            gain_value = 1
        shape = tensor.asnumpy().shape
        if len(shape) > 2:
            s = reduce(lambda x, y: x * y, shape[2:])
        else:
            s = 1
        n_in = shape[1] * s
        n_out = shape[0] * s
        std = gain_value * math.sqrt(2 / (n_in + n_out))
        boundary = std * math.sqrt(3)
        assert _check_uniform(tensor, -boundary, boundary)
示例#2
0
def init_weight(nnet):
    for _, cell in nnet.cells_and_names():
        if isinstance(cell, nn.Conv2d):
            cell.weight.set_data(
                weight_init.initializer(weight_init.XavierUniform(),
                                        cell.weight.shape, cell.weight.dtype))
        if isinstance(cell, nn.Dense):
            cell.weight.set_data(
                weight_init.initializer(weight_init.XavierUniform(),
                                        cell.weight.shape, cell.weight.dtype))
示例#3
0
def init_weights(net, init_type='normal', init_gain=0.02):
    """
    Initialize network weights.
    Parameters:
        net (Cell): Network to be initialized
        init_type (str): The name of an initialization method: normal | xavier.
        init_gain (float): Gain factor for normal and xavier.
    """
    for _, cell in net.cells_and_names():
        if isinstance(cell, (nn.Conv2d, nn.Conv2dTranspose)):
            if init_type == 'normal':
                cell.weight.set_data(
                    init.initializer(init.Normal(init_gain),
                                     cell.weight.shape))
            elif init_type == 'xavier':
                cell.weight.set_data(
                    init.initializer(init.XavierUniform(init_gain),
                                     cell.weight.shape))
            elif init_type == 'constant':
                cell.weight.set_data(init.initializer(0.001,
                                                      cell.weight.shape))
            else:
                raise NotImplementedError(
                    'initialization method [%s] is not implemented' %
                    init_type)
        elif isinstance(cell, nn.BatchNorm2d):
            cell.gamma.set_data(init.initializer('ones', cell.gamma.shape))
            cell.beta.set_data(init.initializer('zeros', cell.beta.shape))
 def __init__(self):
     super(ParameterNet, self).__init__()
     self.para_xavier_uniform = Parameter(init.initializer('xavier_uniform', parameter_shape), name="xavier_uniform")
     self.para_he_uniform = Parameter(init.initializer('he_uniform', parameter_shape), name="he_uniform")
     self.para_xavier_uniform2 = Parameter(init.initializer(init.XavierUniform(), parameter_shape), name="xavier_uniform2")
     self.para_he_uniform2 = Parameter(init.initializer(init.HeUniform(), parameter_shape), name="he_uniform2")
     self.para_truncated_normal = Parameter(init.initializer(init.TruncatedNormal(), parameter_shape), name="truncated_normal")
     self.para_normal = Parameter(init.initializer(init.Normal(), parameter_shape), name="normal")
     self.para_uniform = Parameter(init.initializer(init.Uniform(), parameter_shape), name="uniform")
示例#5
0
def train():
    # set args
    dev = "GPU"
    epoch_size = int(args_opt.epoch_size)
    total_batch = int(args_opt.batch_size)
    print_per_steps = int(args_opt.print_per_steps)
    compute_type = str(args_opt.dtype).lower()
    ckpt_save_dir = str(args_opt.ckpt_path)
    save_ckpt = bool(args_opt.save_ckpt)
    device_num = 1
    # init context
    if args_opt.mode == "GRAPH":
        mode = context.GRAPH_MODE
        all_reduce_fusion_config = [85, 160]
    else:
        mode = context.PYNATIVE_MODE
        all_reduce_fusion_config = [30, 90, 160]
    context.set_context(mode=mode, device_target=dev, save_graphs=False)
    if args_opt.run_distribute:
        init()
        device_num = get_group_size()
        context.set_auto_parallel_context(
            device_num=device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            gradients_mean=True,
            all_reduce_fusion_config=all_reduce_fusion_config)
        ckpt_save_dir = ckpt_save_dir + "ckpt_" + str(get_rank()) + "/"

    # create dataset
    dataset = create_dataset(dataset_path=args_opt.dataset_path,
                             do_train=True,
                             repeat_num=1,
                             batch_size=total_batch,
                             target=dev,
                             dtype=compute_type,
                             device_num=device_num)
    step_size = dataset.get_dataset_size()
    if (print_per_steps > step_size or print_per_steps < 1):
        print("Arg: print_per_steps should lessequal to dataset_size ",
              step_size)
        print("Change to default: 20")
        print_per_steps = 20
    # define net
    net = resnet(class_num=1001, dtype=compute_type)

    # init weight
    for _, cell in net.cells_and_names():
        if isinstance(cell, nn.Conv2d):
            cell.weight.set_data(
                weight_init.initializer(weight_init.XavierUniform(),
                                        cell.weight.shape, cell.weight.dtype))
        if isinstance(cell, nn.Dense):
            cell.weight.set_data(
                weight_init.initializer(weight_init.TruncatedNormal(),
                                        cell.weight.shape, cell.weight.dtype))

    # init lr
    lr = get_liner_lr(lr_init=0,
                      lr_end=0,
                      lr_max=0.8,
                      warmup_epochs=0,
                      total_epochs=epoch_size,
                      steps_per_epoch=step_size)
    lr = Tensor(lr)

    # define opt
    decayed_params = []
    no_decayed_params = []
    for param in net.trainable_params():
        if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
            decayed_params.append(param)
        else:
            no_decayed_params.append(param)

    # define loss, model
    loss = CrossEntropySmooth(sparse=True,
                              reduction='mean',
                              smooth_factor=0.1,
                              num_classes=1001)
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr,
                   0.9, 1e-4)
    loss_scale = FixedLossScaleManager(1024, drop_overflow_update=False)
    model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'})
    # Mixed precision
    if compute_type == "fp16":
        if mode == context.PYNATIVE_MODE:
            opt = MomentumWeightDecay(
                filter(lambda x: x.requires_grad, net.get_parameters()), lr,
                0.9, 1e-4, 1024)
        else:
            opt = Momentum(
                filter(lambda x: x.requires_grad, net.get_parameters()), lr,
                0.9, 1e-4, 1024)
        model = Model(net,
                      loss_fn=loss,
                      optimizer=opt,
                      loss_scale_manager=loss_scale,
                      metrics={'acc'},
                      amp_level="O2",
                      keep_batchnorm_fp32=False)
    # define callbacks
    if mode == context.PYNATIVE_MODE:
        print_per_steps = 1
    time_cb = MyTimeMonitor(total_batch, print_per_steps, step_size, mode)
    cb = [time_cb]
    if save_ckpt:
        config_ck = CheckpointConfig(save_checkpoint_steps=5 * step_size,
                                     keep_checkpoint_max=5)
        ckpt_cb = ModelCheckpoint(prefix="resnet_benchmark",
                                  directory=ckpt_save_dir,
                                  config=config_ck)
        cb += [ckpt_cb]
    # train model
    print("========START RESNET50 GPU BENCHMARK========")
    if mode == context.GRAPH_MODE:
        model.train(int(epoch_size * step_size / print_per_steps),
                    dataset,
                    callbacks=cb,
                    sink_size=print_per_steps)
    else:
        model.train(epoch_size, dataset, callbacks=cb)
示例#6
0
                             target=dev,
                             dtype=compute_type)
    step_size = dataset.get_dataset_size()
    if (print_per_steps > step_size or print_per_steps < 1):
        print("Arg: print_per_steps should lessequal to dataset_size ",
              step_size)
        print("Change to default: 20")
        print_per_steps = 20
    # define net
    net = resnet(class_num=1001, dtype=compute_type)

    # init weight
    for _, cell in net.cells_and_names():
        if isinstance(cell, nn.Conv2d):
            cell.weight.set_data(
                weight_init.initializer(weight_init.XavierUniform(),
                                        cell.weight.shape, cell.weight.dtype))
        if isinstance(cell, nn.Dense):
            cell.weight.set_data(
                weight_init.initializer(weight_init.TruncatedNormal(),
                                        cell.weight.shape, cell.weight.dtype))

    # init lr
    lr = get_liner_lr(lr_init=0,
                      lr_end=0,
                      lr_max=0.8,
                      warmup_epochs=0,
                      total_epochs=epoch_size,
                      steps_per_epoch=step_size)
    lr = Tensor(lr)
示例#7
0
def test_init_xavier_uniform_error():
    with py.raises(ValueError):
        init.initializer(init.XavierUniform(), [6], ms.float32)
示例#8
0
                             batch_size=config.batch_size, target=target)
    step_size = dataset.get_dataset_size()

    # define net
    net = resnet(class_num=config.class_num)
    if args_opt.parameter_server:
        net.set_param_ps()

    # init weight
    if args_opt.pre_trained:
        param_dict = load_checkpoint(args_opt.pre_trained)
        load_param_into_net(net, param_dict)
    else:
        for _, cell in net.cells_and_names():
            if isinstance(cell, nn.Conv2d):
                cell.weight.default_input = weight_init.initializer(weight_init.XavierUniform(),
                                                                    cell.weight.shape,
                                                                    cell.weight.dtype)
            if isinstance(cell, nn.Dense):
                cell.weight.default_input = weight_init.initializer(weight_init.TruncatedNormal(),
                                                                    cell.weight.shape,
                                                                    cell.weight.dtype)

    # init lr
    if args_opt.net == "resnet50" or args_opt.net == "se-resnet50":
        lr = get_lr(lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max,
                    warmup_epochs=config.warmup_epochs, total_epochs=config.epoch_size, steps_per_epoch=step_size,
                    lr_decay_mode=config.lr_decay_mode)
    else:
        lr = warmup_cosine_annealing_lr(config.lr, step_size, config.warmup_epochs, config.epoch_size,
                                        config.pretrain_epoch_size * step_size)
示例#9
0
文件: train.py 项目: Asleda/mindspore
                                 repeat_num=1,
                                 batch_size=config.batch_size)
    step_size = dataset.get_dataset_size()

    # define net
    net = resnet(class_num=config.class_num)

    # init weight
    if args_opt.pre_trained:
        param_dict = load_checkpoint(args_opt.pre_trained)
        load_param_into_net(net, param_dict)
    else:
        for _, cell in net.cells_and_names():
            if isinstance(cell, nn.Conv2d):
                cell.weight.default_input = weight_init.initializer(
                    weight_init.XavierUniform(),
                    cell.weight.default_input.shape,
                    cell.weight.default_input.dtype).to_tensor()
            if isinstance(cell, nn.Dense):
                cell.weight.default_input = weight_init.initializer(
                    weight_init.TruncatedNormal(),
                    cell.weight.default_input.shape,
                    cell.weight.default_input.dtype).to_tensor()

    # init lr
    if args_opt.net == "resnet50":
        if args_opt.dataset == "cifar10":
            lr = get_lr(lr_init=config.lr_init,
                        lr_end=config.lr_end,
                        lr_max=config.lr_max,
                        warmup_epochs=config.warmup_epochs,
示例#10
0
    if not args_opt.do_eval and args_opt.run_distribute:
        context.set_auto_parallel_context(
            device_num=args_opt.device_num,
            parallel_mode=ParallelMode.DATA_PARALLEL,
            mirror_mean=True,
            parameter_broadcast=True)
        auto_parallel_context().set_all_reduce_fusion_split_indices([180, 313])
        init()

    epoch_size = config.epoch_size
    net = resnet101(class_num=config.class_num)
    # weight init
    for _, cell in net.cells_and_names():
        if isinstance(cell, nn.Conv2d):
            cell.weight.default_input = weight_init.initializer(
                weight_init.XavierUniform(), cell.weight.default_input.shape,
                cell.weight.default_input.dtype).to_tensor()
        if isinstance(cell, nn.Dense):
            cell.weight.default_input = weight_init.initializer(
                weight_init.TruncatedNormal(), cell.weight.default_input.shape,
                cell.weight.default_input.dtype).to_tensor()
    if not config.label_smooth:
        config.label_smooth_factor = 0.0
    loss = CrossEntropy(smooth_factor=config.label_smooth_factor,
                        num_classes=config.class_num)
    if args_opt.do_train:
        dataset = create_dataset(dataset_path=args_opt.dataset_path,
                                 do_train=True,
                                 repeat_num=epoch_size,
                                 batch_size=config.batch_size)
        step_size = dataset.get_dataset_size()
示例#11
0
                             batch_size=config.batch_size,
                             target=target)
    step_size = dataset.get_dataset_size()

    # define net
    net = resnet(class_num=config.class_num)

    # init weight
    if args_opt.pre_trained:
        param_dict = load_checkpoint(args_opt.pre_trained)
        load_param_into_net(net, param_dict)
    else:
        for _, cell in net.cells_and_names():
            if isinstance(cell, nn.Conv2d):
                cell.weight.default_input = weight_init.initializer(
                    weight_init.XavierUniform(), cell.weight.shape,
                    cell.weight.dtype)
            if isinstance(cell, nn.Dense):
                cell.weight.default_input = weight_init.initializer(
                    weight_init.TruncatedNormal(), cell.weight.shape,
                    cell.weight.dtype)

    # init lr
    if args_opt.net == "resnet50":
        if args_opt.dataset == "cifar10":
            lr = get_lr(lr_init=config.lr_init,
                        lr_end=config.lr_end,
                        lr_max=config.lr_max,
                        warmup_epochs=config.warmup_epochs,
                        total_epochs=config.epoch_size,
                        steps_per_epoch=step_size,
示例#12
0
                             batch_size=config.batch_size, target=target)
    step_size = dataset.get_dataset_size()

    # define net
    net = mobilenet(class_num=config.class_num)
    if args_opt.parameter_server:
        net.set_param_ps()

    # init weight
    if args_opt.pre_trained:
        param_dict = load_checkpoint(args_opt.pre_trained)
        load_param_into_net(net, param_dict)
    else:
        for _, cell in net.cells_and_names():
            if isinstance(cell, nn.Conv2d):
                cell.weight.set_data(weight_init.initializer(weight_init.XavierUniform(),
                                                             cell.weight.shape,
                                                             cell.weight.dtype))
            if isinstance(cell, nn.Dense):
                cell.weight.set_data(weight_init.initializer(weight_init.TruncatedNormal(),
                                                             cell.weight.shape,
                                                             cell.weight.dtype))

    # init lr
    lr = get_lr(lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max,
                warmup_epochs=config.warmup_epochs, total_epochs=config.epoch_size, steps_per_epoch=step_size,
                lr_decay_mode=config.lr_decay_mode)
    lr = Tensor(lr)

    # define opt
    decayed_params = []