示例#1
0
文件: utils.py 项目: djndl1/CSNotes
def get_folder_data(train_path, val_path, data_shape, batch_size, num_workers=os.cpu_count()):
    train_dataset = ImageFolderDataset(train_path)
    val_dataset = ImageFolderDataset(val_path)

    train_transformer = gluon.data.vision.transforms.Compose([
        transforms.RandomFlipLeftRight(),
        transforms.RandomResizedCrop(data_shape, scale=(0.5, 1.0)),
        transforms.RandomBrightness(0.5),
        transforms.RandomHue(0.1),
        transforms.Resize(data_shape),
        transforms.ToTensor()
    ])
    val_transformer = gluon.data.vision.transforms.Compose([
        transforms.Resize(data_shape),
        transforms.ToTensor()
    ])

    train_dataloader = data.DataLoader(train_dataset.transform_first(train_transformer),
                                         batch_size=batch_size, shuffle=True, last_batch='rollover', 
                                        num_workers=num_workers)
    val_dataloader = data.DataLoader(val_dataset.transform_first(val_transformer),
                                         batch_size=batch_size, shuffle=True, last_batch='rollover', 
                                        num_workers=num_workers)

    return train_dataloader, val_dataloader
示例#2
0
    def __init__(self, use_float16=False):
        self._transform_test = transforms.Compose([transforms.ToTensor()])

        self._transform_train = transforms.Compose([
            transforms.RandomBrightness(0.3),
            transforms.RandomContrast(0.3),
            transforms.RandomSaturation(0.3),
            transforms.RandomFlipLeftRight(),
            transforms.ToTensor()
        ])
        self.use_float16 = use_float16
示例#3
0
def test_transformer():
    from mxnet.gluon.data.vision import transforms

    transform = transforms.Compose([
		transforms.Resize(300),
		transforms.CenterCrop(256),
		transforms.RandomResizedCrop(224),
		transforms.RandomFlipLeftRight(),
		transforms.RandomColorJitter(0.1, 0.1, 0.1, 0.1),
		transforms.RandomBrightness(0.1),
		transforms.RandomContrast(0.1),
		transforms.RandomSaturation(0.1),
		transforms.RandomHue(0.1),
		transforms.RandomLighting(0.1),
		transforms.ToTensor(),
		transforms.Normalize([0, 0, 0], [1, 1, 1])])

    transform(mx.nd.ones((245, 480, 3), dtype='uint8')).wait_to_read()
示例#4
0
logger = logging.getLogger('')
logger.setLevel(logging.INFO)
logger.addHandler(filehandler)
logger.addHandler(streamhandler)

logger.info(opt)
if opt.dataset == 'emore' and opt.batch_size < 512:
    logger.info("Warning: If you train a model on emore with batch size < 512 may lead to not converge."
                "You may try a smaller dataset.")

transform_test = transforms.Compose([
    transforms.ToTensor()
])

_transform_train = transforms.Compose([
    transforms.RandomBrightness(0.3),
    transforms.RandomContrast(0.3),
    transforms.RandomSaturation(0.3),
    transforms.RandomFlipLeftRight(),
    transforms.ToTensor()
])


def transform_train(data, label):
    im = _transform_train(data)
    return im, label


def inf_train_gen(loader):
    while True:
        for batch in loader:
def train():
    logging.info('Start Training for Task: %s\n' % (task))

    # Initialize the net with pretrained model
    pretrained_net = gluon.model_zoo.vision.get_model(model_name, pretrained=True)

    finetune_net = gluon.model_zoo.vision.get_model(model_name, classes=task_num_class)
    finetune_net.features = pretrained_net.features
    finetune_net.output.initialize(init.Xavier(), ctx = ctx)
    finetune_net.collect_params().reset_ctx(ctx)
    finetune_net.hybridize()

    train_transform = transforms.Compose([
        transforms.Resize(input_scale),
        #transforms.RandomResizedCrop(448,scale=(0.76, 1.0),ratio=(0.999, 1.001)),
        transforms.RandomFlipLeftRight(),
        transforms.RandomBrightness(0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    train_dataset = gluon.data.vision.ImageFolderDataset(os.path.join('.','train_valid_allset', task, 'train'))
    train_data = gluon.data.DataLoader(train_dataset.transform_first(train_transform),
        batch_size=batch_size, shuffle=True, num_workers=num_workers, last_batch='discard')


    val_transform = transforms.Compose([
        transforms.Resize(input_scale),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    val_dataset = gluon.data.vision.ImageFolderDataset(os.path.join('.','train_valid_allset', task, 'val'))
    val_data = gluon.data.DataLoader(val_dataset.transform_first(val_transform),
        batch_size=batch_size, shuffle=False, num_workers = num_workers, last_batch='discard')

    trainer = gluon.Trainer(finetune_net.collect_params(), 'adam', {
        'learning_rate': lr})
    metric = mx.metric.Accuracy()
    L = gluon.loss.SoftmaxCrossEntropyLoss()
    lr_counter = 0
    num_batch = len(train_data)

    # Start Training
    best_AP = 0
    best_acc = 0
    for epoch in range(epochs):
        if epoch == lr_steps[lr_counter]:
            finetune_net.collect_params().load(best_path, ctx= ctx)
            trainer.set_learning_rate(trainer.learning_rate*lr_factor)
            lr_counter += 1

        tic = time.time()
        train_loss = 0
        metric.reset()
        AP = 0.
        AP_cnt = 0

        for i, batch in enumerate(train_data):
            data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
            label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
            with ag.record():
                outputs = [finetune_net(X) for X in data]
                loss = [L(yhat, y) for yhat, y in zip(outputs, label)]
            for l in loss:
                l.backward()

            trainer.step(batch_size)
            train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss)

            metric.update(label, outputs)
            #ap, cnt = calculate_ap(label, outputs)
            #AP += ap
            #AP_cnt += cnt
            #progressbar(i, num_batch-1)

        #train_map = AP / AP_cnt
        _, train_acc = metric.get()
        train_loss /= num_batch

        this_AP, val_acc, val_loss = validate(finetune_net, val_data, ctx)

        logging.info('[Epoch %d] Train-acc: %.3f, loss: %.3f | Val-acc: %.3f, mAP: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f' %
                 (epoch, train_acc,  train_loss, val_acc, this_AP, val_loss, time.time() - tic, trainer.learning_rate))
        f_val.writelines('[Epoch %d] Train-acc: %.3f, , loss: %.3f | Val-acc: %.3f, mAP: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n' %
                 (epoch, train_acc, train_loss, val_acc, this_AP, val_loss, time.time() - tic, trainer.learning_rate))
        if val_acc > best_acc:
            best_AP = this_AP
            best_acc = val_acc
            best_path = os.path.join('.', 'models', '%s_%s_%s_%s_staging.params' % (task, model_name, epoch, best_acc))
            finetune_net.collect_params().save(best_path)

    logging.info('\n')
    finetune_net.collect_params().load(best_path, ctx= ctx)
    f_val.writelines('Best val acc is :[Epoch %d] Train-acc: %.3f, loss: %.3f | Best-val-acc: %.3f, Best-mAP: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n' %
         (epoch, train_acc, train_loss, best_acc, best_AP, val_loss, time.time() - tic, trainer.learning_rate))
    return (finetune_net)
示例#6
0
max_aspect_ratio = 4.0 / 3.0
min_aspect_ratio = 3.0 / 4.0
max_random_area = 1
min_random_area = 0.08
jitter_param = 0.4
lighting_param = 0.1

transform_train = transforms.Compose([    
#     transforms.RandomResizedCrop(resize,
#                                  scale=(min_random_area, max_random_area), 
#                                  ratio=(min_aspect_ratio, max_aspect_ratio)),
    
        # Randomly flip the image horizontally
    transforms.RandomFlipLeftRight(),
    
    transforms.RandomBrightness(brightness=jitter_param),
    transforms.RandomSaturation(saturation=jitter_param),
    transforms.RandomHue(hue=jitter_param),
    
    transforms.RandomLighting(lighting_param),
    
    # Randomly crop an area and resize it to be 32x32, then pad it to be 40x40
    gcv_transforms.RandomCrop(32, pad=4),
        
    # Transpose the image from height*width*num_channels to num_channels*height*width
    # and map values from [0, 255] to [0,1]
    transforms.ToTensor(),
    
    # Normalize the image with mean and standard deviation calculated across all images
    transforms.Normalize(mean_rgb, std_rgb),
])
示例#7
0
def train():
    if config.restart_training:
        shutil.rmtree(config.output_dir, ignore_errors=True)
    if config.output_dir is None:
        config.output_dir = 'output'
    if not os.path.exists(config.output_dir):
        os.makedirs(config.output_dir)
    logger = setup_logger(os.path.join(config.output_dir, 'train_log'))
    logger.info('train with gpu %s and mxnet %s' %
                (config.gpu_id, mx.__version__))

    ctx = mx.gpu(config.gpu_id)
    # 设置随机种子
    mx.random.seed(2)
    mx.random.seed(2, ctx=ctx)

    train_transfroms = transforms.Compose(
        [transforms.RandomBrightness(0.5),
         transforms.ToTensor()])
    train_dataset = ImageDataset(config.trainfile,
                                 (config.img_h, config.img_w),
                                 3,
                                 80,
                                 config.alphabet,
                                 phase='train')
    train_data_loader = DataLoader(
        train_dataset.transform_first(train_transfroms),
        config.train_batch_size,
        shuffle=True,
        last_batch='keep',
        num_workers=config.workers)
    test_dataset = ImageDataset(config.testfile, (config.img_h, config.img_w),
                                3,
                                80,
                                config.alphabet,
                                phase='test')
    test_data_loader = DataLoader(test_dataset.transform_first(
        transforms.ToTensor()),
                                  config.eval_batch_size,
                                  shuffle=True,
                                  last_batch='keep',
                                  num_workers=config.workers)
    net = CRNN(len(config.alphabet), hidden_size=config.nh)
    net.hybridize()
    if not config.restart_training and config.checkpoint != '':
        logger.info('load pretrained net from {}'.format(config.checkpoint))
        net.load_parameters(config.checkpoint, ctx=ctx)
    else:
        net.initialize(ctx=ctx)

    criterion = gluon.loss.CTCLoss()

    all_step = len(train_data_loader)
    logger.info('each epoch contains {} steps'.format(all_step))
    schedule = mx.lr_scheduler.FactorScheduler(step=config.lr_decay_step *
                                               all_step,
                                               factor=config.lr_decay,
                                               stop_factor_lr=config.end_lr)
    # schedule = mx.lr_scheduler.MultiFactorScheduler(step=[15 * all_step, 30 * all_step, 60 * all_step,80 * all_step],
    #                                                 factor=0.1)
    adam_optimizer = mx.optimizer.Adam(learning_rate=config.lr,
                                       lr_scheduler=schedule)
    trainer = gluon.Trainer(net.collect_params(), optimizer=adam_optimizer)

    sw = SummaryWriter(logdir=config.output_dir)
    for epoch in range(config.start_epoch, config.end_epoch):
        loss = .0
        train_acc = .0
        tick = time.time()
        cur_step = 0
        for i, (data, label) in enumerate(train_data_loader):
            data = data.as_in_context(ctx)
            label = label.as_in_context(ctx)

            with autograd.record():
                output = net(data)
                loss_ctc = criterion(output, label)
            loss_ctc.backward()
            trainer.step(data.shape[0])

            loss_c = loss_ctc.mean()
            cur_step = epoch * all_step + i
            sw.add_scalar(tag='ctc_loss',
                          value=loss_c.asscalar(),
                          global_step=cur_step // 2)
            sw.add_scalar(tag='lr',
                          value=trainer.learning_rate,
                          global_step=cur_step // 2)
            loss += loss_c
            acc = accuracy(output, label, config.alphabet)
            train_acc += acc
            if (i + 1) % config.display_interval == 0:
                acc /= len(label)
                sw.add_scalar(tag='train_acc', value=acc, global_step=cur_step)
                batch_time = time.time() - tick
                logger.info(
                    '[{}/{}], [{}/{}],step: {}, Speed: {:.3f} samples/sec, ctc loss: {:.4f},acc: {:.4f}, lr:{},'
                    ' time:{:.4f} s'.format(
                        epoch, config.end_epoch, i, all_step, cur_step,
                        config.display_interval * config.train_batch_size /
                        batch_time,
                        loss.asscalar() / config.display_interval, acc,
                        trainer.learning_rate, batch_time))
                loss = .0
                tick = time.time()
                nd.waitall()
        if epoch == 0:
            sw.add_graph(net)
        logger.info('start val ....')
        train_acc /= train_dataset.__len__()
        validation_accuracy = evaluate_accuracy(
            net, test_data_loader, ctx,
            config.alphabet) / test_dataset.__len__()
        sw.add_scalar(tag='val_acc',
                      value=validation_accuracy,
                      global_step=cur_step)
        logger.info("Epoch {},train_acc {:.4f}, val_acc {:.4f}".format(
            epoch, train_acc, validation_accuracy))
        net.save_parameters("{}/{}_{:.4f}_{:.4f}.params".format(
            config.output_dir, epoch, train_acc, validation_accuracy))
    sw.close()
示例#8
0
def train():
    logging.info('Start Training for Task: %s\n' % (task))

    # Initialize the net with pretrained model
    pretrained_net = gluon.model_zoo.vision.get_model(model_name,
                                                      pretrained=True)

    finetune_net = gluon.model_zoo.vision.get_model(model_name,
                                                    classes=task_num_class)
    finetune_net.features = pretrained_net.features
    finetune_net.output.initialize(init.Xavier(), ctx=ctx)
    finetune_net.collect_params().reset_ctx(ctx)
    finetune_net.hybridize()

    # Carefully set the 'scale' parameter to make the 'muti-scale train' and 'muti-scale test'
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(448,
                                     scale=(0.76, 1.0),
                                     ratio=(0.999, 1.001)),
        transforms.RandomFlipLeftRight(),
        transforms.RandomBrightness(0.20),
        #transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param,
        #                             saturation=jitter_param),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    train_dataset = my_customdataset.my_custom_dataset(
        imgroot=os.path.join(".", 'train_valid_allset', task, 'train'),
        labelmasterpath='label_master.csv')
    train_data = gluon.data.DataLoader(
        train_dataset.transform_first(train_transform),
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        last_batch='discard')

    val_transform = transforms.Compose([
        transforms.Resize(480),
        transforms.CenterCrop(448),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    val_dataset = my_customdataset.my_custom_dataset(
        imgroot=os.path.join(".", 'train_valid_allset', task, 'val'),
        labelmasterpath='label_master.csv')
    val_data = gluon.data.DataLoader(
        val_dataset.transform_first(val_transform),
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers)

    # Define Trainer use ADam to make mdoel converge quickly
    trainer = gluon.Trainer(finetune_net.collect_params(), 'adam',
                            {'learning_rate': lr})
    metric = mx.metric.Accuracy()
    L = gluon.loss.SoftmaxCrossEntropyLoss()
    lr_counter = 0
    num_batch = len(train_data)

    # Start Training
    best_AP = 0
    best_acc = 0
    for epoch in range(epochs):
        train_acc = 0.
        #### Load the best model when go to the next training stage
        if epoch == lr_steps[lr_counter]:
            finetune_net.collect_params().load(best_path, ctx=ctx)
            trainer.set_learning_rate(trainer.learning_rate * lr_factor)
            lr_counter += 1

        tic = time.time()
        train_loss = 0
        metric.reset()
        AP = 0.
        AP_cnt = 0

        for i, batch in enumerate(train_data):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0,
                                              even_split=False)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0,
                                               even_split=False)
            with ag.record():
                outputs = [finetune_net(X) for X in data]
                loss = []
                ###### Handle 'm' label by soft-softmax function ######
                for yhat, y in zip(outputs[0], label[0]):
                    loss_1 = 0
                    if y[1] == 99:  # only have y [4,0,0,0,0]
                        loss_1 += L(yhat, y[0])
                    elif y[2] == 99:  #have one m [4,1,0,0,0]
                        loss_1 = 0.8 * L(yhat, y[0]) + 0.2 * L(yhat, y[1])
                    elif y[3] == 99:  #have two m [4,1,3,0,0]
                        loss_1 = 0.7 * L(yhat, y[0]) + 0.15 * L(
                            yhat, y[1]) + 0.15 * L(yhat, y[2])
                    else:  # have many m [4,1,3,2,0]
                        loss_1 = 0.6 * L(yhat, y[0]) + 0.13 * L(
                            yhat, y[1]) + 0.13 * L(yhat, y[2]) + 0.13 * L(
                                yhat, y[3])

                    loss += [loss_1]

                #loss = [L(yhat, y) for yhat, y in zip(outputs, label)
            # for l in loss:
            #     l.backward()
            ag.backward(loss)  # for soft-softmax

            trainer.step(batch_size)
            train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss)
            #train_acc += accuracy(outputs, label)
            metric.update([label[0][:, 0]], outputs)
            #ap, cnt = calculate_ap(label, outputs)
            #AP += ap
            #AP_cnt += cnt
            #progressbar(i, num_batch-1)

        #train_map = AP / AP_cnt
        _, train_acc = metric.get()
        train_loss /= num_batch

        val_acc, val_loss = validate(finetune_net, val_data, ctx)

        logging.info(
            '[Epoch %d] Train-acc: %.3f, loss: %.3f | Val-acc: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f'
            % (epoch, train_acc, train_loss, val_acc, val_loss,
               time.time() - tic, trainer.learning_rate))
        f_val.writelines(
            '[Epoch %d] Train-acc: %.3f, , loss: %.3f | Val-acc: %.3f,  loss: %.3f | time: %.1f | learning_rate %.6f\n'
            % (epoch, train_acc, train_loss, val_acc, val_loss,
               time.time() - tic, trainer.learning_rate))
        ### Save the best model every stage
        if val_acc > best_acc:
            #best_AP = this_AP
            best_acc = val_acc
            if not os.path.exists(os.path.join('.', 'models')):
                os.makedirs(os.path.join('.', 'models'))
            if not os.path.exists(
                    os.path.join(
                        '.', 'models', '%s_%s_%s_%s_staging.params' %
                        (task, model_name, epoch, best_acc))):
                f = open(
                    os.path.join(
                        '.', 'models', '%s_%s_%s_%s_staging.params' %
                        (task, model_name, epoch, best_acc)), 'w')
                f.close()
            best_path = os.path.join(
                '.', 'models', '%s_%s_%s_%s_staging.params' %
                (task, model_name, epoch, best_acc))
            finetune_net.collect_params().save(best_path)

    logging.info('\n')
    finetune_net.collect_params().load(best_path, ctx=ctx)
    f_val.writelines(
        'Best val acc is :[Epoch %d] Train-acc: %.3f, loss: %.3f | Best-val-acc: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n'
        % (epoch, train_acc, train_loss, best_acc, val_loss, time.time() - tic,
           trainer.learning_rate))
    return (finetune_net)
def main():
    opt = parse_args()
    batch_size = opt.batch_size
    classes = 10

    log_dir = os.path.join(opt.save_dir, "logs")
    model_dir = os.path.join(opt.save_dir, "params")
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    # Init dataloader
    jitter_param = 0.4
    transform_train = transforms.Compose([
        gcv_transforms.RandomCrop(32, pad=4),
        transforms.RandomFlipLeftRight(),
        transforms.RandomBrightness(jitter_param),
        transforms.RandomColorJitter(jitter_param),
        transforms.RandomContrast(jitter_param),
        transforms.RandomSaturation(jitter_param),
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465],
                             [0.2023, 0.1994, 0.2010])
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.4914, 0.4822, 0.4465],
                             [0.2023, 0.1994, 0.2010])
    ])

    train_data = gluon.data.DataLoader(
        gluon.data.vision.CIFAR10(train=True).transform_first(transform_train),
        batch_size=batch_size,
        shuffle=True,
        last_batch='discard',
        num_workers=opt.num_workers)

    val_data = gluon.data.DataLoader(
        gluon.data.vision.CIFAR10(train=False).transform_first(transform_test),
        batch_size=batch_size,
        shuffle=False,
        num_workers=opt.num_workers)

    num_gpus = opt.num_gpus
    batch_size *= max(1, num_gpus)
    context = [mx.gpu(i)
               for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]

    lr_decay = opt.lr_decay
    lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] + [np.inf]

    model_name = opt.model
    model_name = opt.model
    if model_name.startswith('cifar_wideresnet'):
        kwargs = {'classes': classes, 'drop_rate': opt.drop_rate}
    else:
        kwargs = {'classes': classes}
    net = get_model(model_name, **kwargs)

    if opt.resume_from:
        net.load_parameters(opt.resume_from, ctx=context)
    optimizer = 'nag'

    save_period = opt.save_period
    if opt.save_dir and save_period:
        save_dir = opt.save_dir
        makedirs(save_dir)
    else:
        save_dir = ''
        save_period = 0

    def test(ctx, val_loader):
        metric = mx.metric.Accuracy()
        for i, batch in enumerate(val_loader):
            data = gluon.utils.split_and_load(batch[0],
                                              ctx_list=ctx,
                                              batch_axis=0)
            label = gluon.utils.split_and_load(batch[1],
                                               ctx_list=ctx,
                                               batch_axis=0)
            outputs = [net(X) for X in data]
            metric.update(label, outputs)
        return metric.get()

    def train(train_data, val_data, epochs, ctx):
        if isinstance(ctx, mx.Context):
            ctx = [ctx]

        net.hybridize()
        net.initialize(mx.init.Xavier(), ctx=ctx)
        net.forward(mx.nd.ones((1, 3, 30, 30), ctx=ctx[0]))
        with SummaryWriter(logdir=log_dir, verbose=False) as sw:
            sw.add_graph(net)

        trainer = gluon.Trainer(net.collect_params(), optimizer, {
            'learning_rate': opt.lr,
            'wd': opt.wd,
            'momentum': opt.momentum
        })
        metric = mx.metric.Accuracy()
        train_metric = mx.metric.Accuracy()
        loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()

        iteration = 0
        lr_decay_count = 0

        best_val_score = 0
        global_step = 0

        for epoch in range(epochs):
            tic = time.time()
            train_metric.reset()
            metric.reset()
            train_loss = 0
            num_batch = len(train_data)
            alpha = 1

            if epoch == lr_decay_epoch[lr_decay_count]:
                trainer.set_learning_rate(trainer.learning_rate * lr_decay)
                lr_decay_count += 1

            tbar = tqdm(train_data)

            for i, batch in enumerate(tbar):
                data = gluon.utils.split_and_load(batch[0],
                                                  ctx_list=ctx,
                                                  batch_axis=0)
                label = gluon.utils.split_and_load(batch[1],
                                                   ctx_list=ctx,
                                                   batch_axis=0)

                with ag.record():
                    output = [net(X) for X in data]
                    loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)]
                for l in loss:
                    l.backward()
                trainer.step(batch_size)
                train_loss += sum([l.sum().asscalar() for l in loss])

                train_metric.update(label, output)
                name, acc = train_metric.get()
                iteration += 1
                global_step += len(loss)

            train_loss /= batch_size * num_batch
            name, acc = train_metric.get()
            name, val_acc = test(ctx, val_data)

            if val_acc > best_val_score:
                best_val_score = val_acc
                net.save_parameters('{}/{}-{}-{:04.3f}-best.params'.format(
                    model_dir, model_name, epoch, best_val_score))

            with SummaryWriter(logdir=log_dir, verbose=False) as sw:
                sw.add_scalar(tag="TrainLos",
                              value=train_loss,
                              global_step=global_step)
                sw.add_scalar(tag="TrainAcc",
                              value=acc,
                              global_step=global_step)
                sw.add_scalar(tag="ValAcc",
                              value=val_acc,
                              global_step=global_step)
                sw.add_graph(net)

            logging.info('[Epoch %d] train=%f val=%f loss=%f time: %f' %
                         (epoch, acc, val_acc, train_loss, time.time() - tic))

            if save_period and save_dir and (epoch + 1) % save_period == 0:
                net.save_parameters('{}/{}-{}.params'.format(
                    save_dir, model_name, epoch))

        if save_period and save_dir:
            net.save_parameters('{}/{}-{}.params'.format(
                save_dir, model_name, epochs - 1))

    if opt.mode == 'hybrid':
        net.hybridize()
    train(train_data, val_data, opt.num_epochs, context)
示例#10
0
lr_factor = 0.75

ctx = mx.gpu()
batch_size = 64

# data augment
jitter_param = 0.4
brightness = 0.1
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomFlipLeftRight(),
    transforms.RandomColorJitter(brightness=jitter_param,
                                 contrast=jitter_param,
                                 saturation=jitter_param),
    transforms.RandomBrightness(brightness=brightness),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
transform_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# data loader
path = '../data/minc-2500'
train_path = os.path.join(path, 'train')
val_path = os.path.join(path, 'val')
test_path = os.path.join(path, 'test')