def get_folder_data(train_path, val_path, data_shape, batch_size, num_workers=os.cpu_count()): train_dataset = ImageFolderDataset(train_path) val_dataset = ImageFolderDataset(val_path) train_transformer = gluon.data.vision.transforms.Compose([ transforms.RandomFlipLeftRight(), transforms.RandomResizedCrop(data_shape, scale=(0.5, 1.0)), transforms.RandomBrightness(0.5), transforms.RandomHue(0.1), transforms.Resize(data_shape), transforms.ToTensor() ]) val_transformer = gluon.data.vision.transforms.Compose([ transforms.Resize(data_shape), transforms.ToTensor() ]) train_dataloader = data.DataLoader(train_dataset.transform_first(train_transformer), batch_size=batch_size, shuffle=True, last_batch='rollover', num_workers=num_workers) val_dataloader = data.DataLoader(val_dataset.transform_first(val_transformer), batch_size=batch_size, shuffle=True, last_batch='rollover', num_workers=num_workers) return train_dataloader, val_dataloader
def __init__(self, use_float16=False): self._transform_test = transforms.Compose([transforms.ToTensor()]) self._transform_train = transforms.Compose([ transforms.RandomBrightness(0.3), transforms.RandomContrast(0.3), transforms.RandomSaturation(0.3), transforms.RandomFlipLeftRight(), transforms.ToTensor() ]) self.use_float16 = use_float16
def test_transformer(): from mxnet.gluon.data.vision import transforms transform = transforms.Compose([ transforms.Resize(300), transforms.CenterCrop(256), transforms.RandomResizedCrop(224), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(0.1, 0.1, 0.1, 0.1), transforms.RandomBrightness(0.1), transforms.RandomContrast(0.1), transforms.RandomSaturation(0.1), transforms.RandomHue(0.1), transforms.RandomLighting(0.1), transforms.ToTensor(), transforms.Normalize([0, 0, 0], [1, 1, 1])]) transform(mx.nd.ones((245, 480, 3), dtype='uint8')).wait_to_read()
logger = logging.getLogger('') logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) logger.info(opt) if opt.dataset == 'emore' and opt.batch_size < 512: logger.info("Warning: If you train a model on emore with batch size < 512 may lead to not converge." "You may try a smaller dataset.") transform_test = transforms.Compose([ transforms.ToTensor() ]) _transform_train = transforms.Compose([ transforms.RandomBrightness(0.3), transforms.RandomContrast(0.3), transforms.RandomSaturation(0.3), transforms.RandomFlipLeftRight(), transforms.ToTensor() ]) def transform_train(data, label): im = _transform_train(data) return im, label def inf_train_gen(loader): while True: for batch in loader:
def train(): logging.info('Start Training for Task: %s\n' % (task)) # Initialize the net with pretrained model pretrained_net = gluon.model_zoo.vision.get_model(model_name, pretrained=True) finetune_net = gluon.model_zoo.vision.get_model(model_name, classes=task_num_class) finetune_net.features = pretrained_net.features finetune_net.output.initialize(init.Xavier(), ctx = ctx) finetune_net.collect_params().reset_ctx(ctx) finetune_net.hybridize() train_transform = transforms.Compose([ transforms.Resize(input_scale), #transforms.RandomResizedCrop(448,scale=(0.76, 1.0),ratio=(0.999, 1.001)), transforms.RandomFlipLeftRight(), transforms.RandomBrightness(0.2), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) train_dataset = gluon.data.vision.ImageFolderDataset(os.path.join('.','train_valid_allset', task, 'train')) train_data = gluon.data.DataLoader(train_dataset.transform_first(train_transform), batch_size=batch_size, shuffle=True, num_workers=num_workers, last_batch='discard') val_transform = transforms.Compose([ transforms.Resize(input_scale), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) val_dataset = gluon.data.vision.ImageFolderDataset(os.path.join('.','train_valid_allset', task, 'val')) val_data = gluon.data.DataLoader(val_dataset.transform_first(val_transform), batch_size=batch_size, shuffle=False, num_workers = num_workers, last_batch='discard') trainer = gluon.Trainer(finetune_net.collect_params(), 'adam', { 'learning_rate': lr}) metric = mx.metric.Accuracy() L = gluon.loss.SoftmaxCrossEntropyLoss() lr_counter = 0 num_batch = len(train_data) # Start Training best_AP = 0 best_acc = 0 for epoch in range(epochs): if epoch == lr_steps[lr_counter]: finetune_net.collect_params().load(best_path, ctx= ctx) trainer.set_learning_rate(trainer.learning_rate*lr_factor) lr_counter += 1 tic = time.time() train_loss = 0 metric.reset() AP = 0. AP_cnt = 0 for i, batch in enumerate(train_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) with ag.record(): outputs = [finetune_net(X) for X in data] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] for l in loss: l.backward() trainer.step(batch_size) train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss) metric.update(label, outputs) #ap, cnt = calculate_ap(label, outputs) #AP += ap #AP_cnt += cnt #progressbar(i, num_batch-1) #train_map = AP / AP_cnt _, train_acc = metric.get() train_loss /= num_batch this_AP, val_acc, val_loss = validate(finetune_net, val_data, ctx) logging.info('[Epoch %d] Train-acc: %.3f, loss: %.3f | Val-acc: %.3f, mAP: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f' % (epoch, train_acc, train_loss, val_acc, this_AP, val_loss, time.time() - tic, trainer.learning_rate)) f_val.writelines('[Epoch %d] Train-acc: %.3f, , loss: %.3f | Val-acc: %.3f, mAP: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n' % (epoch, train_acc, train_loss, val_acc, this_AP, val_loss, time.time() - tic, trainer.learning_rate)) if val_acc > best_acc: best_AP = this_AP best_acc = val_acc best_path = os.path.join('.', 'models', '%s_%s_%s_%s_staging.params' % (task, model_name, epoch, best_acc)) finetune_net.collect_params().save(best_path) logging.info('\n') finetune_net.collect_params().load(best_path, ctx= ctx) f_val.writelines('Best val acc is :[Epoch %d] Train-acc: %.3f, loss: %.3f | Best-val-acc: %.3f, Best-mAP: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n' % (epoch, train_acc, train_loss, best_acc, best_AP, val_loss, time.time() - tic, trainer.learning_rate)) return (finetune_net)
max_aspect_ratio = 4.0 / 3.0 min_aspect_ratio = 3.0 / 4.0 max_random_area = 1 min_random_area = 0.08 jitter_param = 0.4 lighting_param = 0.1 transform_train = transforms.Compose([ # transforms.RandomResizedCrop(resize, # scale=(min_random_area, max_random_area), # ratio=(min_aspect_ratio, max_aspect_ratio)), # Randomly flip the image horizontally transforms.RandomFlipLeftRight(), transforms.RandomBrightness(brightness=jitter_param), transforms.RandomSaturation(saturation=jitter_param), transforms.RandomHue(hue=jitter_param), transforms.RandomLighting(lighting_param), # Randomly crop an area and resize it to be 32x32, then pad it to be 40x40 gcv_transforms.RandomCrop(32, pad=4), # Transpose the image from height*width*num_channels to num_channels*height*width # and map values from [0, 255] to [0,1] transforms.ToTensor(), # Normalize the image with mean and standard deviation calculated across all images transforms.Normalize(mean_rgb, std_rgb), ])
def train(): if config.restart_training: shutil.rmtree(config.output_dir, ignore_errors=True) if config.output_dir is None: config.output_dir = 'output' if not os.path.exists(config.output_dir): os.makedirs(config.output_dir) logger = setup_logger(os.path.join(config.output_dir, 'train_log')) logger.info('train with gpu %s and mxnet %s' % (config.gpu_id, mx.__version__)) ctx = mx.gpu(config.gpu_id) # 设置随机种子 mx.random.seed(2) mx.random.seed(2, ctx=ctx) train_transfroms = transforms.Compose( [transforms.RandomBrightness(0.5), transforms.ToTensor()]) train_dataset = ImageDataset(config.trainfile, (config.img_h, config.img_w), 3, 80, config.alphabet, phase='train') train_data_loader = DataLoader( train_dataset.transform_first(train_transfroms), config.train_batch_size, shuffle=True, last_batch='keep', num_workers=config.workers) test_dataset = ImageDataset(config.testfile, (config.img_h, config.img_w), 3, 80, config.alphabet, phase='test') test_data_loader = DataLoader(test_dataset.transform_first( transforms.ToTensor()), config.eval_batch_size, shuffle=True, last_batch='keep', num_workers=config.workers) net = CRNN(len(config.alphabet), hidden_size=config.nh) net.hybridize() if not config.restart_training and config.checkpoint != '': logger.info('load pretrained net from {}'.format(config.checkpoint)) net.load_parameters(config.checkpoint, ctx=ctx) else: net.initialize(ctx=ctx) criterion = gluon.loss.CTCLoss() all_step = len(train_data_loader) logger.info('each epoch contains {} steps'.format(all_step)) schedule = mx.lr_scheduler.FactorScheduler(step=config.lr_decay_step * all_step, factor=config.lr_decay, stop_factor_lr=config.end_lr) # schedule = mx.lr_scheduler.MultiFactorScheduler(step=[15 * all_step, 30 * all_step, 60 * all_step,80 * all_step], # factor=0.1) adam_optimizer = mx.optimizer.Adam(learning_rate=config.lr, lr_scheduler=schedule) trainer = gluon.Trainer(net.collect_params(), optimizer=adam_optimizer) sw = SummaryWriter(logdir=config.output_dir) for epoch in range(config.start_epoch, config.end_epoch): loss = .0 train_acc = .0 tick = time.time() cur_step = 0 for i, (data, label) in enumerate(train_data_loader): data = data.as_in_context(ctx) label = label.as_in_context(ctx) with autograd.record(): output = net(data) loss_ctc = criterion(output, label) loss_ctc.backward() trainer.step(data.shape[0]) loss_c = loss_ctc.mean() cur_step = epoch * all_step + i sw.add_scalar(tag='ctc_loss', value=loss_c.asscalar(), global_step=cur_step // 2) sw.add_scalar(tag='lr', value=trainer.learning_rate, global_step=cur_step // 2) loss += loss_c acc = accuracy(output, label, config.alphabet) train_acc += acc if (i + 1) % config.display_interval == 0: acc /= len(label) sw.add_scalar(tag='train_acc', value=acc, global_step=cur_step) batch_time = time.time() - tick logger.info( '[{}/{}], [{}/{}],step: {}, Speed: {:.3f} samples/sec, ctc loss: {:.4f},acc: {:.4f}, lr:{},' ' time:{:.4f} s'.format( epoch, config.end_epoch, i, all_step, cur_step, config.display_interval * config.train_batch_size / batch_time, loss.asscalar() / config.display_interval, acc, trainer.learning_rate, batch_time)) loss = .0 tick = time.time() nd.waitall() if epoch == 0: sw.add_graph(net) logger.info('start val ....') train_acc /= train_dataset.__len__() validation_accuracy = evaluate_accuracy( net, test_data_loader, ctx, config.alphabet) / test_dataset.__len__() sw.add_scalar(tag='val_acc', value=validation_accuracy, global_step=cur_step) logger.info("Epoch {},train_acc {:.4f}, val_acc {:.4f}".format( epoch, train_acc, validation_accuracy)) net.save_parameters("{}/{}_{:.4f}_{:.4f}.params".format( config.output_dir, epoch, train_acc, validation_accuracy)) sw.close()
def train(): logging.info('Start Training for Task: %s\n' % (task)) # Initialize the net with pretrained model pretrained_net = gluon.model_zoo.vision.get_model(model_name, pretrained=True) finetune_net = gluon.model_zoo.vision.get_model(model_name, classes=task_num_class) finetune_net.features = pretrained_net.features finetune_net.output.initialize(init.Xavier(), ctx=ctx) finetune_net.collect_params().reset_ctx(ctx) finetune_net.hybridize() # Carefully set the 'scale' parameter to make the 'muti-scale train' and 'muti-scale test' train_transform = transforms.Compose([ transforms.RandomResizedCrop(448, scale=(0.76, 1.0), ratio=(0.999, 1.001)), transforms.RandomFlipLeftRight(), transforms.RandomBrightness(0.20), #transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, # saturation=jitter_param), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) train_dataset = my_customdataset.my_custom_dataset( imgroot=os.path.join(".", 'train_valid_allset', task, 'train'), labelmasterpath='label_master.csv') train_data = gluon.data.DataLoader( train_dataset.transform_first(train_transform), batch_size=batch_size, shuffle=True, num_workers=num_workers, last_batch='discard') val_transform = transforms.Compose([ transforms.Resize(480), transforms.CenterCrop(448), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) val_dataset = my_customdataset.my_custom_dataset( imgroot=os.path.join(".", 'train_valid_allset', task, 'val'), labelmasterpath='label_master.csv') val_data = gluon.data.DataLoader( val_dataset.transform_first(val_transform), batch_size=batch_size, shuffle=False, num_workers=num_workers) # Define Trainer use ADam to make mdoel converge quickly trainer = gluon.Trainer(finetune_net.collect_params(), 'adam', {'learning_rate': lr}) metric = mx.metric.Accuracy() L = gluon.loss.SoftmaxCrossEntropyLoss() lr_counter = 0 num_batch = len(train_data) # Start Training best_AP = 0 best_acc = 0 for epoch in range(epochs): train_acc = 0. #### Load the best model when go to the next training stage if epoch == lr_steps[lr_counter]: finetune_net.collect_params().load(best_path, ctx=ctx) trainer.set_learning_rate(trainer.learning_rate * lr_factor) lr_counter += 1 tic = time.time() train_loss = 0 metric.reset() AP = 0. AP_cnt = 0 for i, batch in enumerate(train_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) with ag.record(): outputs = [finetune_net(X) for X in data] loss = [] ###### Handle 'm' label by soft-softmax function ###### for yhat, y in zip(outputs[0], label[0]): loss_1 = 0 if y[1] == 99: # only have y [4,0,0,0,0] loss_1 += L(yhat, y[0]) elif y[2] == 99: #have one m [4,1,0,0,0] loss_1 = 0.8 * L(yhat, y[0]) + 0.2 * L(yhat, y[1]) elif y[3] == 99: #have two m [4,1,3,0,0] loss_1 = 0.7 * L(yhat, y[0]) + 0.15 * L( yhat, y[1]) + 0.15 * L(yhat, y[2]) else: # have many m [4,1,3,2,0] loss_1 = 0.6 * L(yhat, y[0]) + 0.13 * L( yhat, y[1]) + 0.13 * L(yhat, y[2]) + 0.13 * L( yhat, y[3]) loss += [loss_1] #loss = [L(yhat, y) for yhat, y in zip(outputs, label) # for l in loss: # l.backward() ag.backward(loss) # for soft-softmax trainer.step(batch_size) train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss) #train_acc += accuracy(outputs, label) metric.update([label[0][:, 0]], outputs) #ap, cnt = calculate_ap(label, outputs) #AP += ap #AP_cnt += cnt #progressbar(i, num_batch-1) #train_map = AP / AP_cnt _, train_acc = metric.get() train_loss /= num_batch val_acc, val_loss = validate(finetune_net, val_data, ctx) logging.info( '[Epoch %d] Train-acc: %.3f, loss: %.3f | Val-acc: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f' % (epoch, train_acc, train_loss, val_acc, val_loss, time.time() - tic, trainer.learning_rate)) f_val.writelines( '[Epoch %d] Train-acc: %.3f, , loss: %.3f | Val-acc: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n' % (epoch, train_acc, train_loss, val_acc, val_loss, time.time() - tic, trainer.learning_rate)) ### Save the best model every stage if val_acc > best_acc: #best_AP = this_AP best_acc = val_acc if not os.path.exists(os.path.join('.', 'models')): os.makedirs(os.path.join('.', 'models')) if not os.path.exists( os.path.join( '.', 'models', '%s_%s_%s_%s_staging.params' % (task, model_name, epoch, best_acc))): f = open( os.path.join( '.', 'models', '%s_%s_%s_%s_staging.params' % (task, model_name, epoch, best_acc)), 'w') f.close() best_path = os.path.join( '.', 'models', '%s_%s_%s_%s_staging.params' % (task, model_name, epoch, best_acc)) finetune_net.collect_params().save(best_path) logging.info('\n') finetune_net.collect_params().load(best_path, ctx=ctx) f_val.writelines( 'Best val acc is :[Epoch %d] Train-acc: %.3f, loss: %.3f | Best-val-acc: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n' % (epoch, train_acc, train_loss, best_acc, val_loss, time.time() - tic, trainer.learning_rate)) return (finetune_net)
def main(): opt = parse_args() batch_size = opt.batch_size classes = 10 log_dir = os.path.join(opt.save_dir, "logs") model_dir = os.path.join(opt.save_dir, "params") if not os.path.exists(model_dir): os.makedirs(model_dir) # Init dataloader jitter_param = 0.4 transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.RandomBrightness(jitter_param), transforms.RandomColorJitter(jitter_param), transforms.RandomContrast(jitter_param), transforms.RandomSaturation(jitter_param), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) train_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=opt.num_workers) val_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=opt.num_workers) num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] lr_decay = opt.lr_decay lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] + [np.inf] model_name = opt.model model_name = opt.model if model_name.startswith('cifar_wideresnet'): kwargs = {'classes': classes, 'drop_rate': opt.drop_rate} else: kwargs = {'classes': classes} net = get_model(model_name, **kwargs) if opt.resume_from: net.load_parameters(opt.resume_from, ctx=context) optimizer = 'nag' save_period = opt.save_period if opt.save_dir and save_period: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_period = 0 def test(ctx, val_loader): metric = mx.metric.Accuracy() for i, batch in enumerate(val_loader): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) outputs = [net(X) for X in data] metric.update(label, outputs) return metric.get() def train(train_data, val_data, epochs, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] net.hybridize() net.initialize(mx.init.Xavier(), ctx=ctx) net.forward(mx.nd.ones((1, 3, 30, 30), ctx=ctx[0])) with SummaryWriter(logdir=log_dir, verbose=False) as sw: sw.add_graph(net) trainer = gluon.Trainer(net.collect_params(), optimizer, { 'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum }) metric = mx.metric.Accuracy() train_metric = mx.metric.Accuracy() loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() iteration = 0 lr_decay_count = 0 best_val_score = 0 global_step = 0 for epoch in range(epochs): tic = time.time() train_metric.reset() metric.reset() train_loss = 0 num_batch = len(train_data) alpha = 1 if epoch == lr_decay_epoch[lr_decay_count]: trainer.set_learning_rate(trainer.learning_rate * lr_decay) lr_decay_count += 1 tbar = tqdm(train_data) for i, batch in enumerate(tbar): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) with ag.record(): output = [net(X) for X in data] loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)] for l in loss: l.backward() trainer.step(batch_size) train_loss += sum([l.sum().asscalar() for l in loss]) train_metric.update(label, output) name, acc = train_metric.get() iteration += 1 global_step += len(loss) train_loss /= batch_size * num_batch name, acc = train_metric.get() name, val_acc = test(ctx, val_data) if val_acc > best_val_score: best_val_score = val_acc net.save_parameters('{}/{}-{}-{:04.3f}-best.params'.format( model_dir, model_name, epoch, best_val_score)) with SummaryWriter(logdir=log_dir, verbose=False) as sw: sw.add_scalar(tag="TrainLos", value=train_loss, global_step=global_step) sw.add_scalar(tag="TrainAcc", value=acc, global_step=global_step) sw.add_scalar(tag="ValAcc", value=val_acc, global_step=global_step) sw.add_graph(net) logging.info('[Epoch %d] train=%f val=%f loss=%f time: %f' % (epoch, acc, val_acc, train_loss, time.time() - tic)) if save_period and save_dir and (epoch + 1) % save_period == 0: net.save_parameters('{}/{}-{}.params'.format( save_dir, model_name, epoch)) if save_period and save_dir: net.save_parameters('{}/{}-{}.params'.format( save_dir, model_name, epochs - 1)) if opt.mode == 'hybrid': net.hybridize() train(train_data, val_data, opt.num_epochs, context)
lr_factor = 0.75 ctx = mx.gpu() batch_size = 64 # data augment jitter_param = 0.4 brightness = 0.1 transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomBrightness(brightness=brightness), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) transform_test = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # data loader path = '../data/minc-2500' train_path = os.path.join(path, 'train') val_path = os.path.join(path, 'val') test_path = os.path.join(path, 'test')