Exemplo n.º 1
0
def test(args, net, loss, opt, dataset, checkpoints):
    model = KungFuModel(
        net,
        loss_fn=loss,
        optimizer=opt,
        # loss_scale_manager=loss_scale,
        metrics={'acc'},
        amp_level="O2",
        keep_batchnorm_fp32=False,
    )

    results = []
    for ckpt_name in checkpoints:
        logical_step = parse_logical_step(ckpt_name)
        acc = test_checkpoint(net, model, dataset, ckpt_name)
        results.append((logical_step, acc))
        msg = "%s, accuracy: %s" % (ckpt_name, acc)
        print(msg)

    filename = 'plot/lbs-%d+dbs-%d.txt' % (args.logical_batch_size,
                                           args.device_batch_size)

    with open(filename, 'w') as f:
        for step, acc in results:
            # msg = "%s, accuracy: %s" % (ckpt_name, acc)
            msg = '%d %s' % (step, acc)
            f.write(msg + '\n')

    print('saved to %s' % (filename))
Exemplo n.º 2
0
def test(args, net, loss, opt, dataset, checkpoints):
    metrics = [
        'top_1_accuracy',
        'top_5_accuracy',
    ]
    model = KungFuModel(
        net,
        loss_fn=loss,
        optimizer=opt,
        metrics=set(metrics),
        amp_level="O2",

        # [ERROR] DEVICE(8546,python3.7):2021-02-27-01:12:19.225.728 [mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc:118] SelectAkgKernel] Not find op[BatchNorm] in akg
        # [ERROR] DEVICE(8546,python3.7):2021-02-27-01:12:19.225.791 [mindspore/ccsrc/runtime/device/gpu/kernel_info_setter.cc:322] PrintUnsupportedTypeException] Select GPU kernel op[BatchNorm] fail! Incompatible data type!
        # The supported data types are in[float32 float32 float32 float32 float32], out[float32 float32 float32 float32 float32]; in[float16 float32 float32 float32 float32], out[float16 float32 float32 float32 float32]; , but get in [float16 float16 float16 float16 float16 ] out [float16 float16 float16 float16 float16 ]
        # keep_batchnorm_fp32=False,
    )

    results = []
    for ckpt_name in checkpoints:
        epoch, step = parse_logical_step(ckpt_name)
        result = test_checkpoint(net, model, dataset, ckpt_name)
        results.append((epoch, step, result))
        msg = "%s, %s: %s" % (ckpt_name, metrics[0], result[metrics[0]])
        print(msg)

    filename = get_eval_result_filename(args)

    with open(filename, 'w') as f:
        for epoch, step, result in sorted(results):
            acc = result[metrics[0]]
            msg = '%d %d %s' % (epoch, step, acc)
            f.write(msg + '\n')

    print('%d points saved to %s' % (len(results), filename))
Exemplo n.º 3
0
def train(args, net, loss, opt, dataset):
    model = KungFuModel(
        net,
        loss_fn=loss,
        optimizer=opt,
        # loss_scale_manager=loss_scale,
        metrics={'acc'},
        amp_level="O2",
        keep_batchnorm_fp32=False,
    )
    model.train(
        args,
        epoch=args.epochs,
        train_dataset=dataset,
        callbacks=build_callbacks(args),
        dataset_sink_mode=False,
    )