示例#1
0
def eval_alexnet():
    print("============== Starting Testing ==============")

    device_num = get_device_num()
    if device_num > 1:
        # context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target)
        context.set_context(mode=context.GRAPH_MODE,
                            device_target='Davinci',
                            save_graphs=False)
        if config.device_target == "Ascend":
            context.set_context(device_id=get_device_id())
            init()
        elif config.device_target == "GPU":
            init()

    if config.dataset_name == 'cifar10':
        network = AlexNet(config.num_classes, phase='test')
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
        opt = nn.Momentum(network.trainable_params(), config.learning_rate,
                          config.momentum)
        ds_eval = create_dataset_cifar10(config.data_path, config.batch_size, status="test", \
            target=config.device_target)
        param_dict = load_checkpoint(load_path)
        print("load checkpoint from [{}].".format(load_path))
        load_param_into_net(network, param_dict)
        network.set_train(False)
        model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})

    elif config.dataset_name == 'imagenet':
        network = AlexNet(config.num_classes, phase='test')
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
        ds_eval = create_dataset_imagenet(config.data_path,
                                          config.batch_size,
                                          training=False)
        param_dict = load_checkpoint(load_path)
        print("load checkpoint from [{}].".format(load_path))
        load_param_into_net(network, param_dict)
        network.set_train(False)
        model = Model(network,
                      loss_fn=loss,
                      metrics={'top_1_accuracy', 'top_5_accuracy'})

    else:
        raise ValueError("Unsupported dataset.")

    if ds_eval.get_dataset_size() == 0:
        raise ValueError(
            "Please check dataset size > 0 and batch_size <= dataset size")

    result = model.eval(ds_eval, dataset_sink_mode=config.dataset_sink_mode)
    print("result : {}".format(result))
示例#2
0
    if device_target == "Ascend":
        context.set_context(device_id=args_opt.device_id)

        if device_num > 1:
            context.reset_auto_parallel_context()
            context.set_auto_parallel_context(
                device_num=device_num,
                parallel_mode=ParallelMode.DATA_PARALLEL,
                gradients_mean=True)
            init()
            rank = get_rank()
    else:
        raise ValueError("Unsupported platform.")

    if args_opt.dataset_name == "imagenet":
        dataset = create_dataset_imagenet(cfg.data_path, 1)
    else:
        raise ValueError("Unsupport dataset.")

    batch_num = dataset.get_dataset_size()

    net = TinyDarkNet(num_classes=cfg.num_classes)
    # Continue training if set pre_trained to be True
    if cfg.pre_trained:
        param_dict = load_checkpoint(cfg.checkpoint_path)
        load_param_into_net(net, param_dict)

    loss_scale_manager = None
    if args_opt.dataset_name == 'imagenet':
        lr = lr_steps_imagenet(cfg, batch_num)
示例#3
0
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
        opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum)
        ds_eval = create_dataset_cifar10(args.data_path, cfg.batch_size, status="test", target=args.device_target)

        param_dict = load_checkpoint(args.ckpt_path)
        print("load checkpoint from [{}].".format(args.ckpt_path))
        load_param_into_net(network, param_dict)
        network.set_train(False)

        model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()})

    elif args.dataset_name == 'imagenet':
        cfg = alexnet_imagenet_cfg
        network = AlexNet(cfg.num_classes)
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
        ds_eval = create_dataset_imagenet(args.data_path, cfg.batch_size, training=False)

        param_dict = load_checkpoint(args.ckpt_path)
        print("load checkpoint from [{}].".format(args.ckpt_path))
        load_param_into_net(network, param_dict)
        network.set_train(False)

        model = Model(network, loss_fn=loss, metrics={'top_1_accuracy', 'top_5_accuracy'})

    else:
        raise ValueError("Unsupport dataset.")

    if ds_eval.get_dataset_size() == 0:
        raise ValueError("Please check dataset size > 0 and batch_size <= dataset size")

    result = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode)
示例#4
0
文件: eval.py 项目: yrpang/mindspore
parser.add_argument('--dataset_name',
                    type=str,
                    default='imagenet',
                    choices=['imagenet', 'cifar10'],
                    help='dataset name.')
parser.add_argument('--checkpoint_path',
                    type=str,
                    default=None,
                    help='Checkpoint file path')
args_opt = parser.parse_args()

if __name__ == '__main__':

    if args_opt.dataset_name == "imagenet":
        cfg = imagenet_cfg
        dataset = create_dataset_imagenet(cfg.val_data_path, 1, False)
        if not cfg.use_label_smooth:
            cfg.label_smooth_factor = 0.0
        loss = CrossEntropySmooth(sparse=True,
                                  reduction="mean",
                                  smooth_factor=cfg.label_smooth_factor,
                                  num_classes=cfg.num_classes)
        net = TinyDarkNet(num_classes=cfg.num_classes)
        model = Model(net,
                      loss_fn=loss,
                      metrics={'top_1_accuracy', 'top_5_accuracy'})

    else:
        raise ValueError("dataset is not support.")

    device_target = cfg.device_target
示例#5
0
        if device_num > 1:
            init()
            context.reset_auto_parallel_context()
            context.set_auto_parallel_context(
                device_num=device_num,
                parallel_mode=ParallelMode.DATA_PARALLEL,
                gradients_mean=True)
    else:
        raise ValueError("Unsupported platform.")

    if args.dataset_name == "cifar10":
        ds_train = create_dataset_cifar10(args.data_path,
                                          cfg.batch_size,
                                          target=args.device_target)
    elif args.dataset_name == "imagenet":
        ds_train = create_dataset_imagenet(args.data_path, cfg.batch_size)
    else:
        raise ValueError("Unsupport dataset.")

    if ds_train.get_dataset_size() == 0:
        raise ValueError(
            "Please check dataset size > 0 and batch_size <= dataset size")

    network = AlexNet(cfg.num_classes, phase='train')

    loss_scale_manager = None
    metrics = None
    step_per_epoch = ds_train.get_dataset_size(
    ) if args.sink_size == -1 else args.sink_size
    if args.dataset_name == 'cifar10':
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
示例#6
0
def train_alexnet():
    print(config)
    print('device id:', get_device_id())
    print('device num:', get_device_num())
    print('rank id:', get_rank_id())
    print('job id:', get_job_id())

    device_target = config.device_target
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=config.device_target)
    context.set_context(save_graphs=False)

    device_num = get_device_num()
    if config.dataset_name == "cifar10":
        if device_num > 1:
            config.learning_rate = config.learning_rate * device_num
            config.epoch_size = config.epoch_size * 2
    elif config.dataset_name == "imagenet":
        pass
    else:
        raise ValueError("Unsupported dataset.")

    if device_num > 1:
        context.reset_auto_parallel_context()
        context.set_auto_parallel_context(device_num=device_num, \
            parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True)
        if device_target == "Ascend":
            context.set_context(device_id=get_device_id())
            init()
        elif device_target == "GPU":
            init()
    else:
        context.set_context(device_id=get_device_id())

    if config.dataset_name == "cifar10":
        ds_train = create_dataset_cifar10(config.data_path,
                                          config.batch_size,
                                          target=config.device_target)
    elif config.dataset_name == "imagenet":
        ds_train = create_dataset_imagenet(config.data_path, config.batch_size)
    else:
        raise ValueError("Unsupported dataset.")

    if ds_train.get_dataset_size() == 0:
        raise ValueError(
            "Please check dataset size > 0 and batch_size <= dataset size")

    network = AlexNet(config.num_classes, phase='train')

    loss_scale_manager = None
    metrics = None
    step_per_epoch = ds_train.get_dataset_size(
    ) if config.sink_size == -1 else config.sink_size
    if config.dataset_name == 'cifar10':
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
        lr = Tensor(
            get_lr_cifar10(0, config.learning_rate, config.epoch_size,
                           step_per_epoch))
        opt = nn.Momentum(network.trainable_params(), lr, config.momentum)
        metrics = {"Accuracy": Accuracy()}

    elif config.dataset_name == 'imagenet':
        loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean")
        lr = Tensor(
            get_lr_imagenet(config.learning_rate, config.epoch_size,
                            step_per_epoch))
        opt = nn.Momentum(params=get_param_groups(network),
                          learning_rate=lr,
                          momentum=config.momentum,
                          weight_decay=config.weight_decay,
                          loss_scale=config.loss_scale)

        from mindspore.train.loss_scale_manager import DynamicLossScaleManager, FixedLossScaleManager
        if config.is_dynamic_loss_scale == 1:
            loss_scale_manager = DynamicLossScaleManager(init_loss_scale=65536,
                                                         scale_factor=2,
                                                         scale_window=2000)
        else:
            loss_scale_manager = FixedLossScaleManager(
                config.loss_scale, drop_overflow_update=False)

    else:
        raise ValueError("Unsupported dataset.")

    if device_target == "Ascend":
        model = Model(network,
                      loss_fn=loss,
                      optimizer=opt,
                      metrics=metrics,
                      amp_level="O2",
                      keep_batchnorm_fp32=False,
                      loss_scale_manager=loss_scale_manager)
    elif device_target == "GPU":
        model = Model(network,
                      loss_fn=loss,
                      optimizer=opt,
                      metrics=metrics,
                      loss_scale_manager=loss_scale_manager)
    else:
        raise ValueError("Unsupported platform.")

    if device_num > 1:
        ckpt_save_dir = os.path.join(config.checkpoint_path + "_" +
                                     str(get_rank()))
    else:
        ckpt_save_dir = config.checkpoint_path

    time_cb = TimeMonitor(data_size=step_per_epoch)
    config_ck = CheckpointConfig(
        save_checkpoint_steps=config.save_checkpoint_steps,
        keep_checkpoint_max=config.keep_checkpoint_max)
    ckpoint_cb = ModelCheckpoint(prefix="checkpoint_alexnet",
                                 directory=ckpt_save_dir,
                                 config=config_ck)

    print("============== Starting Training ==============")
    model.train(config.epoch_size,
                ds_train,
                callbacks=[time_cb, ckpoint_cb,
                           LossMonitor()],
                dataset_sink_mode=config.dataset_sink_mode,
                sink_size=config.sink_size)