def train():
    #数据集文件夹路径
    PATH = 'coco'
    TEST_PATH = "test.jpg"

    #batch size
    BATCH_SIZE = 128
    #输入图像尺寸
    IMG_W, IMG_H = 192, 256
    #特征图尺寸
    HEATMAP_W, HEATMAP_H = 48, 64

    #学习率
    lr = 0.001
    lr_factor = 0.1
    EPOCH = 140
    lr_steps = [90,120,140]

    #指定训练的GPU
    num_workers=2 #值越大数据读取越快但内存占用越多
    context = mx.gpu(0)

    #响应点回归采用L2loss
    L = gluon.loss.L2Loss()
    #构建网络结构 MobileNetV2
    net = posenet.mobilenetv2_05(context, IMG_W, IMG_H)
    #构建评价指标
    metric = HeatmapAccuracy()
    #加载数据
    train_data, val_data = data_loader(PATH, BATCH_SIZE, IMG_W, IMG_H, HEATMAP_W, HEATMAP_H, num_workers)
    #优化器采用ADAM
    trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr})
    #迭代训练
    lr_counter = 0
    for epoch in range(EPOCH):
        metric.reset()
        if epoch == lr_steps[lr_counter]:
            trainer.set_learning_rate(trainer.learning_rate*lr_factor)
            lr_counter += 1
        for i, batch in enumerate(train_data):
            tic = time.time()
            data = gluon.utils.split_and_load(batch[0], ctx_list=[context], batch_axis=0)
            label = gluon.utils.split_and_load(batch[1], ctx_list=[context], batch_axis=0)
            weight = gluon.utils.split_and_load(batch[2], ctx_list=[context], batch_axis=0)
            with ag.record():
                outputs = [net(X) for X in data]
                loss = [L(yhat, y, w) for yhat, y, w in zip(outputs, label, weight)]
            for l in loss:
                l.backward()
            trainer.step(BATCH_SIZE)
            train_loss = sum([l.mean().asscalar() for l in loss]) / len(loss)
            print('[Epoch %d] batch_num: %d | learn_rate: %.5f | loss: %.8f | time: %.1f' %
                (epoch, i, trainer.learning_rate, train_loss, time.time() - tic))
        _, val_acc = test(net, val_data, context)
        mxnet_demo.demo(TEST_PATH, IMG_W, IMG_H, net, context)
        print("============================Val acc: %.5f============================"%val_acc)
        net.export('model/Ultralight-Nano-SimplePose_%.5f'%val_acc, epoch=epoch)
def test(net, val_data, context):
    metric = HeatmapAccuracy()
    for i, batch in enumerate(val_data):
        data = gluon.utils.split_and_load(batch[0], ctx_list=[context], batch_axis=0)
        label = gluon.utils.split_and_load(batch[1], ctx_list=[context], batch_axis=0)
        weight = gluon.utils.split_and_load(batch[2], ctx_list=[context], batch_axis=0)
        outputs = [net(X) for X in data]
        metric.update(label, outputs)
    return metric.get()
def train(ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    if opt.use_pretrained_base:
        net.deconv_layers.initialize(ctx=ctx)
        net.final_layer.initialize(ctx=ctx)
    else:
        net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

    trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)

    L = gluon.loss.L2Loss()
    metric = HeatmapAccuracy()

    best_val_score = 1

    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)

    for epoch in range(opt.num_epochs):
        loss_val = 0
        tic = time.time()
        btic = time.time()
        metric.reset()

        for i, batch in enumerate(train_data):
            data, label, weight, imgid = train_batch_fn(batch, ctx)

            with ag.record():
                outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
                loss = [nd.cast(L(nd.cast(yhat, 'float32'), y, w), opt.dtype)
                        for yhat, y, w in zip(outputs, label, weight)]
            ag.backward(loss)
            trainer.step(batch_size)

            metric.update(label, outputs)

            loss_val += sum([l.mean().asscalar() for l in loss]) / num_gpus
            if opt.log_interval and not (i+1)%opt.log_interval:
                metric_name, metric_score = metric.get()
                logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\tloss=%f\tlr=%f\t%s=%.3f'%(
                             epoch, i, batch_size*opt.log_interval/(time.time()-btic),
                             loss_val / (i+1), trainer.learning_rate, metric_name, metric_score))
                btic = time.time()

        time_elapsed = time.time() - tic
        logger.info('Epoch[%d]\t\tSpeed: %d samples/sec over %d secs\tloss=%f\n'%(
                     epoch, int(i*batch_size / time_elapsed), int(time_elapsed), loss_val / (i+1)))
        if save_frequency and save_dir and (epoch + 1) % save_frequency == 0:
            net.save_parameters('%s/%s-%d.params'%(save_dir, model_name, epoch))
            trainer.save_states('%s/%s-%d.states'%(save_dir, model_name, epoch))

    if save_frequency and save_dir:
        net.save_parameters('%s/%s-%d.params'%(save_dir, model_name, opt.num_epochs-1))
        trainer.save_states('%s/%s-%d.states'%(save_dir, model_name, opt.num_epochs-1))

    return net
Пример #4
0
def train(ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]
    if opt.use_pretrained_base:
        net.deconv_layers.initialize(ctx=ctx)
        net.final_layer.initialize(ctx=ctx)
    else:
        net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

    trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)

    L = gluon.loss.L2Loss()
    metric = HeatmapAccuracy()

    best_val_score = 1

    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)

    for epoch in range(opt.num_epochs):
        loss_val = 0
        tic = time.time()
        btic = time.time()
        metric.reset()

        for i, batch in enumerate(train_data):
            data, label, weight, imgid = train_batch_fn(batch, ctx)

            with ag.record():
                outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
                loss = [nd.cast(L(nd.cast(yhat, 'float32'), y, w), opt.dtype)
                        for yhat, y, w in zip(outputs, label, weight)]
            for l in loss:
                l.backward()
            trainer.step(batch_size)

            metric.update(label, outputs)

            loss_val += sum([l.mean().asscalar() for l in loss]) / num_gpus
            if opt.log_interval and not (i+1)%opt.log_interval:
                metric_name, metric_score = metric.get()
                logger.info('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\tloss=%f\tlr=%f\t%s=%.3f'%(
                             epoch, i, batch_size*opt.log_interval/(time.time()-btic),
                             loss_val / (i+1), trainer.learning_rate, metric_name, metric_score))
                btic = time.time()

        time_elapsed = time.time() - tic
        logger.info('Epoch[%d]\t\tSpeed: %d samples/sec over %d secs\tloss=%f\n'%(
                     epoch, int(i*batch_size / time_elapsed), int(time_elapsed), loss_val / (i+1)))
        if save_frequency and save_dir and (epoch + 1) % save_frequency == 0:
            net.save_parameters('%s/%s-%d.params'%(save_dir, model_name, epoch))
            trainer.save_states('%s/%s-%d.states'%(save_dir, model_name, epoch))

    if save_frequency and save_dir:
        net.save_parameters('%s/%s-%d.params'%(save_dir, model_name, opt.num_epochs-1))
        trainer.save_states('%s/%s-%d.states'%(save_dir, model_name, opt.num_epochs-1))

    return net
#############################################################################
#
#     For this model we use ``adam`` as the optimizer.

trainer = gluon.Trainer(net.collect_params(), 'adam',
                        {'lr_scheduler': lr_scheduler})

#############################################################################
#
# - Metric
#
#     The metric for this model is called heatmap accuracy, i.e. it compares the
#     keypoint heatmaps from the prediction and groundtruth and check if the center
#     of the gaussian distributions are within a certain distance.

metric = HeatmapAccuracy()

#############################################################################
# Training Loop
# -------------
#
# Since we have all necessary blocks, we can now put them together to start the training.
#

net.hybridize(static_alloc=True, static_shape=True)
for epoch in range(1):
    metric.reset()

    for i, batch in enumerate(train_data):
        if i > 0:
            break
Пример #6
0
def train(ctx):
    if isinstance(ctx, mx.Context):
        ctx = [ctx]

    trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)

    L = gluon.loss.L2Loss(weight=2.0)
    metric = HeatmapAccuracy()

    best_ap = 0

    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)

    for epoch in range(opt.num_epochs):
        loss_val = 0
        tic = time.time()
        btic = time.time()
        metric.reset()

        train_data_desc = tqdm(train_data, dynamic_ncols=True)
        for i, batch in enumerate(train_data_desc):
            data, label, weight, imgid = train_batch_fn(batch, ctx)

            with ag.record():
                outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
                loss = [
                    nd.cast(L(nd.cast(yhat, 'float32'), y, w), opt.dtype)
                    for yhat, y, w in zip(outputs, label, weight)
                ]
            ag.backward(loss)
            trainer.step(batch_size)

            metric.update(label, outputs)

            loss_val += sum([l.mean().asscalar() for l in loss]) / num_gpus
            if opt.log_interval and not (i + 1) % opt.log_interval:
                metric_name, metric_score = metric.get()
                logger.info(
                    'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\tloss=%f\tlr=%f\t%s=%.3f'
                    % (epoch, i, batch_size * opt.log_interval /
                       (time.time() - btic), loss_val / (i + 1),
                       trainer.learning_rate, metric_name, metric_score))
                btic = time.time()

        time_elapsed = time.time() - tic
        logger.info(
            'Epoch[%d]\t\tSpeed: %d samples/sec over %d secs\tloss=%f\n' %
            (epoch, int(i * batch_size / time_elapsed), int(time_elapsed),
             loss_val / (i + 1)))
        if save_frequency and save_dir and (epoch + 1) % save_frequency == 0:
            net.save_parameters('%s/%s-%d.params' %
                                (save_dir, model_name, epoch))
            trainer.save_states('%s/%s-%d.states' %
                                (save_dir, model_name, epoch))
        if (epoch + 1) % 2 == 0:
            res = validate(val_data, val_dataset, net, context, opt)[0]
            logger.info(res)
            if res['AP'] > best_ap:
                bestAP = res['AP']
                net.save_parameters(
                    f'{save_dir}/best-{round(bestAP, 3)}.params')
                if os.path.islink(f'{save_dir}/final.params'):
                    os.remove(f'{save_dir}/final.params')
                os.symlink(f'./best-{round(bestAP, 3)}.params',
                           f'{save_dir}/final.params')

    if save_frequency and save_dir:
        net.save_parameters('%s/%s-%d.params' %
                            (save_dir, model_name, opt.num_epochs - 1))
        trainer.save_states('%s/%s-%d.states' %
                            (save_dir, model_name, opt.num_epochs - 1))

    return net
Пример #7
0
#############################################################################
# 
#     For this model we use ``adam`` as the optimizer.


trainer = gluon.Trainer(net.collect_params(), 'adam', {'lr_scheduler': lr_scheduler})

#############################################################################
#
# - Metric
#
#     The metric for this model is called heatmap accuracy, i.e. it compares the
#     keypoint heatmaps from the prediction and groundtruth and check if the center
#     of the gaussian distributions are within a certain distance.

metric = HeatmapAccuracy()


#############################################################################
# Training Loop
# -------------
#
# Since we have all necessary blocks, we can now put them together to start the training.
#

net.hybridize(static_alloc=True, static_shape=True)
for epoch in range(1):
    metric.reset()

    for i, batch in enumerate(train_data):
        if i > 0:
Пример #8
0
def train(opt):

    batch_size = opt.batch_size
    num_joints = opt.num_joints

    num_gpus = opt.num_gpus
    batch_size *= max(1, num_gpus)
    ctx = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
    num_workers = opt.num_workers

    model_name = opt.model

    kwargs = {
        'ctx': ctx,
        'num_joints': num_joints,
        'pretrained': opt.use_pretrained,
        'pretrained_base': opt.use_pretrained_base,
        'pretrained_ctx': ctx
    }

    net = get_model(model_name, **kwargs)
    net.cast(opt.dtype)

    input_size = [int(i) for i in opt.input_size.split(',')]
    train_dataset, train_data, train_batch_fn = get_data_loader(
        opt, batch_size, num_workers, input_size)

    num_training_samples = len(train_dataset)
    lr_decay = opt.lr_decay
    lr_decay_period = opt.lr_decay_period
    if opt.lr_decay_period > 0:
        lr_decay_epoch = list(
            range(lr_decay_period, opt.num_epochs, lr_decay_period))
    else:
        lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')]
    lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch]
    num_batches = num_training_samples // batch_size
    lr_scheduler = LRSequential([
        LRScheduler('linear',
                    base_lr=0,
                    target_lr=opt.lr,
                    nepochs=opt.warmup_epochs,
                    iters_per_epoch=num_batches),
        LRScheduler(opt.lr_mode,
                    base_lr=opt.lr,
                    target_lr=0,
                    nepochs=opt.num_epochs - opt.warmup_epochs,
                    iters_per_epoch=num_batches,
                    step_epoch=lr_decay_epoch,
                    step_factor=lr_decay,
                    power=2)
    ])

    # optimizer = 'sgd'
    # optimizer_params = {'wd': opt.wd, 'momentum': 0.9, 'lr_scheduler': lr_scheduler}
    optimizer = 'adam'
    optimizer_params = {'wd': opt.wd, 'lr_scheduler': lr_scheduler}
    if opt.dtype != 'float32':
        optimizer_params['multi_precision'] = True

    save_frequency = opt.save_frequency
    if opt.save_dir and save_frequency:
        save_dir = opt.save_dir
        makedirs(save_dir)
    else:
        save_dir = ''
        save_frequency = 0

    if isinstance(ctx, mx.Context):
        ctx = [ctx]

    if opt.use_pretrained_base:
        if model_name.startswith('simple'):
            net.deconv_layers.initialize(ctx=ctx)
            net.final_layer.initialize(ctx=ctx)
        elif model_name.startswith('mobile'):
            net.upsampling.initialize(ctx=ctx)
    else:
        net.initialize(mx.init.MSRAPrelu(), ctx=ctx)

    trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)

    L = gluon.loss.L2Loss()
    metric = HeatmapAccuracy()

    best_val_score = 1

    if opt.mode == 'hybrid':
        net.hybridize(static_alloc=True, static_shape=True)

    for epoch in range(opt.num_epochs):
        loss_val = 0
        tic = time.time()
        btic = time.time()
        metric.reset()

        for i, batch in enumerate(train_data):
            data, label, weight, imgid = train_batch_fn(batch, ctx)

            with ag.record():
                outputs = [net(X.astype(opt.dtype, copy=False)) for X in data]
                loss = [
                    nd.cast(L(nd.cast(yhat, 'float32'), y, w), opt.dtype)
                    for yhat, y, w in zip(outputs, label, weight)
                ]
            ag.backward(loss)
            trainer.step(batch_size)

            metric.update(label, outputs)

            loss_val += sum([l.mean().asscalar() for l in loss]) / num_gpus
            if opt.log_interval and not (i + 1) % opt.log_interval:
                metric_name, metric_score = metric.get()
                logger.info(
                    'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\tloss=%f\tlr=%f\t%s=%.3f'
                    % (epoch, i, batch_size * opt.log_interval /
                       (time.time() - btic), loss_val / (i + 1),
                       trainer.learning_rate, metric_name, metric_score))
                btic = time.time()

        time_elapsed = time.time() - tic
        logger.info(
            'Epoch[%d]\t\tSpeed: %d samples/sec over %d secs\tloss=%f\n' %
            (epoch, int(i * batch_size / time_elapsed), int(time_elapsed),
             loss_val / (i + 1)))
        if save_frequency and save_dir and (epoch + 1) % save_frequency == 0:
            net.save_parameters('%s/%s-%d.params' %
                                (save_dir, model_name, epoch))
            trainer.save_states('%s/%s-%d.states' %
                                (save_dir, model_name, epoch))

    if save_frequency and save_dir:
        net.save_parameters('%s/%s-%d.params' %
                            (save_dir, model_name, opt.num_epochs - 1))
        trainer.save_states('%s/%s-%d.states' %
                            (save_dir, model_name, opt.num_epochs - 1))

    return net