if grid.ndim == 3 and grid.shape[2] == 1: grid = grid.squeeze() return grid # %% # Fix the seed # mx.random.seed(42) # %% # -- Get some data to train on mnist_train = gluon.data.vision.datasets.MNIST(train=True) mnist_valid = gluon.data.vision.datasets.MNIST(train=False) transformer = transforms.Compose([transforms.ToTensor()]) # %% # -- Create a data iterator that feeds batches train_data = gluon.data.DataLoader(mnist_train.transform_first(transformer), batch_size=BATCH_SIZE, shuffle=True, last_batch="rollover", num_workers=N_WORKERS) eval_data = gluon.data.DataLoader(mnist_valid.transform_first(transformer), batch_size=BATCH_SIZE, shuffle=False, last_batch="rollover", num_workers=N_WORKERS)
def forward(self, x): x = mx.image.copyMakeBorder(x, self.padding, self.padding, self.padding, self.padding) # print(x.shape) x, _ = mx.image.random_crop(x, (self.size, self.size)) return x normalize = T.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262]) train_transfrom = T.Compose([ RandomCrop(32, padding=4), # T.RandomResizedCrop(32), T.RandomFlipLeftRight(), T.ToTensor(), normalize ]) val_transform = T.Compose([T.ToTensor(), normalize]) trainset = datasets.CIFAR10('./data', train=True).transform_first(train_transfrom) trainloader = gluon.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=2) testset = datasets.CIFAR10('./data', train=False).transform_first(val_transform) testloader = gluon.data.DataLoader(testset,
Read with GluonCV ----------------- The prepared dataset can be loaded with utility class :py:class:`gluoncv.data.ImageNet` directly. Here is an example that randomly reads 128 images each time and performs randomized resizing and cropping. """ from gluoncv.data import ImageNet from mxnet.gluon.data import DataLoader from mxnet.gluon.data.vision import transforms train_trans = transforms.Compose( [transforms.RandomResizedCrop(224), transforms.ToTensor()]) # You need to specify ``root`` for ImageNet if you extracted the images into # a different folder train_data = DataLoader(ImageNet(train=True).transform_first(train_trans), batch_size=128, shuffle=True) ######################################################################### for x, y in train_data: print(x.shape, y.shape) break ######################################################################### # Plot some validation images from gluoncv.utils import viz
def run_mnist( hook=None, set_modes=False, num_steps_train=None, num_steps_eval=None, epochs=2, save_interval=None, save_path="./saveParams", ): batch_size = 4 normalize_mean = 0.13 mnist_train = datasets.FashionMNIST(train=True) X, y = mnist_train[0] ("X shape: ", X.shape, "X dtype", X.dtype, "y:", y) text_labels = [ "t-shirt", "trouser", "pullover", "dress", "coat", "sandal", "shirt", "sneaker", "bag", "ankle boot", ] transformer = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(normalize_mean, 0.31)]) mnist_train = mnist_train.transform_first(transformer) mnist_valid = gluon.data.vision.FashionMNIST(train=False) train_data = gluon.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=4) valid_data = gluon.data.DataLoader( mnist_valid.transform_first(transformer), batch_size=batch_size, num_workers=4) # Create Model in Gluon net = nn.HybridSequential() net.add( nn.Conv2D(channels=6, kernel_size=5, activation="relu"), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=16, kernel_size=3, activation="relu"), nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(), nn.Dense(120, activation="relu"), nn.Dense(84, activation="relu"), nn.Dense(10), ) net.initialize(init=init.Xavier(), ctx=mx.cpu()) if hook is not None: # Register the forward Hook hook.register_hook(net) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), "sgd", {"learning_rate": 0.1}) hook.register_hook(softmax_cross_entropy) # Start the training. for epoch in range(epochs): train_loss, train_acc, valid_acc = 0.0, 0.0, 0.0 tic = time.time() if set_modes: hook.set_mode(modes.TRAIN) i = 0 for data, label in train_data: data = data.as_in_context(mx.cpu(0)) # forward + backward with autograd.record(): output = net(data) loss = softmax_cross_entropy(output, label) loss.backward() # update parameters trainer.step(batch_size) # calculate training metrics train_loss += loss.mean().asscalar() train_acc += acc(output, label) i += 1 if num_steps_train is not None and i >= num_steps_train: break # calculate validation accuracy if set_modes: hook.set_mode(modes.EVAL) i = 0 for data, label in valid_data: data = data.as_in_context(mx.cpu(0)) val_output = net(data) valid_acc += acc(val_output, label) loss = softmax_cross_entropy(val_output, label) i += 1 if num_steps_eval is not None and i >= num_steps_eval: break print( "Epoch %d: loss %.3f, train acc %.3f, test acc %.3f, in %.1f sec" % ( epoch, train_loss / len(train_data), train_acc / len(train_data), valid_acc / len(valid_data), time.time() - tic, )) if save_interval is not None and (epoch % save_interval) == 0: net.save_parameters("{0}/params_{1}.params".format( save_path, epoch))
def train_cifar10(config): args = config.pop("args") vars(args).update(config) np.random.seed(args.seed) random.seed(args.seed) mx.random.seed(args.seed) # Set Hyper-params batch_size = args.batch_size * max(args.num_gpus, 1) ctx = [mx.gpu(i) for i in range(args.num_gpus)] if args.num_gpus > 0 else [mx.cpu()] # Define DataLoader transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]), ]) train_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch="discard", num_workers=args.num_workers, ) test_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=args.num_workers, ) # Load model architecture and Initialize the net with pretrained model finetune_net = get_model(args.model, pretrained=True) with finetune_net.name_scope(): finetune_net.fc = nn.Dense(args.classes) finetune_net.fc.initialize(init.Xavier(), ctx=ctx) finetune_net.collect_params().reset_ctx(ctx) finetune_net.hybridize() # Define trainer trainer = gluon.Trainer( finetune_net.collect_params(), "sgd", { "learning_rate": args.lr, "momentum": args.momentum, "wd": args.wd }, ) L = gluon.loss.SoftmaxCrossEntropyLoss() metric = mx.metric.Accuracy() def train(epoch): for i, batch in enumerate(train_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) with ag.record(): outputs = [finetune_net(X) for X in data] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] for ls in loss: ls.backward() trainer.step(batch_size) mx.nd.waitall() def test(): test_loss = 0 for i, batch in enumerate(test_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) outputs = [finetune_net(X) for X in data] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] test_loss += sum(ls.mean().asscalar() for ls in loss) / len(loss) metric.update(label, outputs) _, test_acc = metric.get() test_loss /= len(test_data) return test_loss, test_acc for epoch in range(1, args.epochs + 1): train(epoch) test_loss, test_acc = test() tune.report(mean_loss=test_loss, mean_accuracy=test_acc)
def main(): opt = parse_args() batch_size = opt.batch_size classes = 10 num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers lr_decay = opt.lr_decay lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] + [np.inf] model_name = opt.model if model_name.startswith('cifar_wideresnet'): kwargs = {'classes': classes, 'drop_rate': opt.drop_rate} else: kwargs = {'classes': classes} net = get_model(model_name, **kwargs) if opt.resume_from: net.load_parameters(opt.resume_from, ctx = context) optimizer = 'nag' save_period = opt.save_period if opt.save_dir and save_period: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_period = 0 plot_path = opt.save_plot_dir logging.basicConfig(level=logging.INFO) logging.info(opt) transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) def test(ctx, val_data): metric = mx.metric.Accuracy() for i, batch in enumerate(val_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) outputs = [net(X) for X in data] metric.update(label, outputs) return metric.get() def train(epochs, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] net.initialize(mx.init.Xavier(), ctx=ctx) train_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) val_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) trainer = gluon.Trainer(net.collect_params(), optimizer, {'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum}) metric = mx.metric.Accuracy() train_metric = mx.metric.Accuracy() loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() train_history = TrainingHistory(['training-error', 'validation-error']) iteration = 0 lr_decay_count = 0 best_val_score = 0 for epoch in range(epochs): tic = time.time() train_metric.reset() metric.reset() train_loss = 0 num_batch = len(train_data) alpha = 1 if epoch == lr_decay_epoch[lr_decay_count]: trainer.set_learning_rate(trainer.learning_rate*lr_decay) lr_decay_count += 1 for i, batch in enumerate(train_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) with ag.record(): output = [net(X) for X in data] loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)] for l in loss: l.backward() trainer.step(batch_size) train_loss += sum([l.sum().asscalar() for l in loss]) train_metric.update(label, output) name, acc = train_metric.get() iteration += 1 train_loss /= batch_size * num_batch name, acc = train_metric.get() name, val_acc = test(ctx, val_data) train_history.update([1-acc, 1-val_acc]) train_history.plot(save_path='%s/%s_history.png'%(plot_path, model_name)) if val_acc > best_val_score: best_val_score = val_acc net.save_parameters('%s/%.4f-cifar-%s-%d-best.params'%(save_dir, best_val_score, model_name, epoch)) logging.info('[Epoch %d] train=%f val=%f loss=%f time: %f' % (epoch, acc, val_acc, train_loss, time.time()-tic)) if save_period and save_dir and (epoch + 1) % save_period == 0: net.save_parameters('%s/cifar10-%s-%d.params'%(save_dir, model_name, epoch)) if save_period and save_dir: net.save_parameters('%s/cifar10-%s-%d.params'%(save_dir, model_name, epochs-1)) if opt.mode == 'hybrid': net.hybridize() train(opt.num_epochs, context)
net = get_model(model_name, **kwargs) net.cast(opt.dtype) if opt.params_file: net.load_params(opt.params_file, ctx=ctx) net.hybridize() acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform_test = transforms.Compose([ transforms.Resize(256, keep_ratio=True), transforms.CenterCrop(224), transforms.ToTensor(), normalize ]) def test(ctx, val_data, mode='image'): acc_top1.reset() acc_top5.reset() if not opt.rec_dir: num_batch = len(val_data) for i, batch in enumerate(val_data): if mode == 'image': data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx,
txts.append(txt) plt.savefig(output_path) def inf_train_gen(loader): """ Using iterations train network. :param loader: Dataloader :return: batch of data """ while True: for batch in loader: yield batch transform_test = transforms.Compose([transforms.ToTensor()]) _transform_train = transforms.Compose([ transforms.RandomBrightness(0.3), transforms.RandomContrast(0.3), transforms.RandomSaturation(0.3), transforms.RandomFlipLeftRight(), transforms.ToTensor() ]) def transform_train(data, label): im = _transform_train(data) return im, label
#tmp = nd.zeros(shape=(self.n_frame, self.crop_size, self.crop_size, 3), dtype='float32') if current_length < self.n_frame: #construct the last frame and concat extra_data = nd.tile(nd_image_list[-1], reps=(self.n_frame - current_length, 1, 1, 1)) extra_data = extra_data.transpose((1, 0, 2, 3)) cthw_data = nd.concat(cthw_data, extra_data, dim=1) # begin to construct the label label_nd = np.zeros(shape=(self.max_label), dtype=np.float32) for tag_index in labels: label_nd[tag_index - 1] = 1 return cthw_data, label_nd train_transform = T.Compose([ T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) def get_simple_meitu_dataloader(datadir, batch_size=4, n_frame=32, crop_size=112, scale_h=128, scale_w=171, num_workers=6): """construct the dataset and then set the datasetloader""" train_dataset = SimpleMeitu(datadir, n_frame, crop_size,
def main(): opt = parse_args() filehandler = logging.FileHandler(opt.logging_file, mode='a+') # streamhandler = logging.StreamHandler() logger = logging.getLogger('ImageNet') logger.setLevel(level=logging.DEBUG) logger.addHandler(filehandler) # logger.addHandler(streamhandler) logger.info(opt) if opt.amp: amp.init() batch_size = opt.batch_size classes = 1000 num_training_samples = 1281167 num_validating_samples = 50000 num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers accumulate = opt.accumulate lr_decay = opt.lr_decay lr_decay_period = opt.lr_decay_period if opt.lr_decay_period > 0: lr_decay_epoch = list( range(lr_decay_period, opt.num_epochs, lr_decay_period)) else: lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch] num_batches = num_training_samples // batch_size lr_scheduler = LRSequential([ LRScheduler('linear', base_lr=0, target_lr=opt.lr, nepochs=opt.warmup_epochs, iters_per_epoch=num_batches), LRScheduler(opt.lr_mode, base_lr=opt.lr, target_lr=0, nepochs=opt.num_epochs - opt.warmup_epochs, iters_per_epoch=num_batches, step_epoch=lr_decay_epoch, step_factor=lr_decay, power=2) ]) model_name = opt.model kwargs = {'ctx': context, 'pretrained': opt.use_pretrained} if opt.use_gn: kwargs['norm_layer'] = gcv.nn.GroupNorm if model_name.startswith('vgg'): kwargs['batch_norm'] = opt.batch_norm elif model_name.startswith('resnext'): kwargs['use_se'] = opt.use_se if opt.last_gamma: kwargs['last_gamma'] = True optimizer = 'sgd' optimizer_params = { 'wd': opt.wd, 'momentum': opt.momentum, 'lr_scheduler': lr_scheduler, 'begin_num_update': num_batches * opt.resume_epoch } # if opt.dtype != 'float32': # optimizer_params['multi_precision'] = True # net = get_model(model_name, **kwargs) if opt.model_backend == 'gluoncv': net = glcv_get_model(model_name, **kwargs) elif opt.model_backend == 'gluoncv2': net = glcv2_get_model(model_name, **kwargs) else: raise ValueError(f'Unknown backend: {opt.model_backend}') # net.cast(opt.dtype) if opt.resume_params != '': net.load_parameters(opt.resume_params, ctx=context, cast_dtype=True) # teacher model for distillation training if opt.teacher is not None and opt.hard_weight < 1.0: teacher_name = opt.teacher if opt.teacher_backend == 'gluoncv': teacher = glcv_get_model(teacher_name, **kwargs) elif opt.teacher_backend == 'gluoncv2': teacher = glcv2_get_model(teacher_name, **kwargs) else: raise ValueError(f'Unknown backend: {opt.teacher_backend}') # teacher = glcv2_get_model(teacher_name, pretrained=True, ctx=context) # teacher.cast(opt.dtype) teacher.collect_params().setattr('grad_req', 'null') distillation = True else: distillation = False # Two functions for reading data from record file or raw images def get_data_rec(rec_train, rec_val): rec_train = os.path.expanduser(rec_train) rec_val = os.path.expanduser(rec_val) # mean_rgb = [123.68, 116.779, 103.939] # std_rgb = [58.393, 57.12, 57.375] train_dataset = ImageRecordDataset(filename=rec_train, flag=1) val_dataset = ImageRecordDataset(filename=rec_val, flag=1) return train_dataset, val_dataset def get_data_loader(data_dir): train_dataset = ImageNet(data_dir, train=True) val_dataset = ImageNet(data_dir, train=False) return train_dataset, val_dataset def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) return data, label if opt.use_rec: train_dataset, val_dataset = get_data_rec(opt.rec_train, opt.rec_val) else: train_dataset, val_dataset = get_data_loader(opt.data_dir) normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) jitter_param = 0.4 lighting_param = 0.1 if not opt.multi_scale: train_dataset = train_dataset.transform_first( transforms.Compose([ transforms.RandomResizedCrop(opt.input_size), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ])) train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, last_batch='rollover', num_workers=num_workers) else: train_data = RandomTransformDataLoader( [ Transform( transforms.Compose([ # transforms.RandomResizedCrop(opt.input_size), transforms.RandomResizedCrop(x * 32), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ])) for x in range(10, 20) ], train_dataset, interval=10 * opt.accumulate, batch_size=batch_size, shuffle=False, pin_memory=True, last_batch='rollover', num_workers=num_workers) val_dataset = val_dataset.transform_first( transforms.Compose([ transforms.Resize(opt.input_size, keep_ratio=True), transforms.CenterCrop(opt.input_size), transforms.ToTensor(), normalize ])) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, last_batch='keep', num_workers=num_workers) if opt.mixup: train_metric = mx.metric.RMSE() else: train_metric = mx.metric.Accuracy() train_loss_metric = mx.metric.Loss() acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) save_frequency = opt.save_frequency if opt.save_dir and save_frequency: if opt.wandb: save_dir = wandb.run.dir else: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_frequency = 0 def mixup_transform(label, classes, lam=1, eta=0.0): if isinstance(label, nd.NDArray): label = [label] res = [] for l in label: y1 = l.one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) y2 = l[::-1].one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) res.append(lam * y1 + (1 - lam) * y2) return res def smooth(label, classes, eta=0.1): if isinstance(label, nd.NDArray): label = [label] smoothed = [] for l in label: res = l.one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) smoothed.append(res) return smoothed def test(ctx, val_data): acc_top1.reset() acc_top5.reset() for i, batch in tqdm.tqdm(enumerate(val_data), desc='Validating', total=num_validating_samples // batch_size): data, label = batch_fn(batch, ctx) # outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] outputs = [net(X) for X in data] acc_top1.update(label, outputs) acc_top5.update(label, outputs) _, top1 = acc_top1.get() _, top5 = acc_top5.get() return 1 - top1, 1 - top5 def train(ctx): if isinstance(ctx, mx.Context): ctx = [ctx] if opt.resume_params == '': import warnings with warnings.catch_warnings(record=True) as w: net.initialize(mx.init.MSRAPrelu(), ctx=ctx) if opt.no_wd: for k, v in net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 if accumulate > 1: logger.info(f'accumulate: {accumulate}, using "add" grad_req') import warnings with warnings.catch_warnings(record=True) as w: net.collect_params().setattr('grad_req', 'add') trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params, update_on_kvstore=False if opt.amp else None) if opt.amp: amp.init_trainer(trainer) if opt.resume_states != '': trainer.load_states(opt.resume_states) if opt.label_smoothing or opt.mixup: sparse_label_loss = False else: sparse_label_loss = True if distillation: L = gcv.loss.DistillationSoftmaxCrossEntropyLoss( temperature=opt.temperature, hard_weight=opt.hard_weight, sparse_label=sparse_label_loss) else: L = gluon.loss.SoftmaxCrossEntropyLoss( sparse_label=sparse_label_loss) best_val_score = 1 err_top1_val, err_top5_val = test(ctx, val_data) logger.info('initial validation: err-top1=%f err-top5=%f' % (err_top1_val, err_top5_val)) for epoch in range(opt.resume_epoch, opt.num_epochs): tic = time.time() train_metric.reset() train_loss_metric.reset() btic = time.time() pbar = tqdm.tqdm(total=num_batches, desc=f'Training [{epoch}]', leave=True) for i, batch in enumerate(train_data): data, label = batch_fn(batch, ctx) if opt.mixup: lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha) if epoch >= opt.num_epochs - opt.mixup_off_epoch: lam = 1 data = [lam * X + (1 - lam) * X[::-1] for X in data] if opt.label_smoothing: eta = 0.1 else: eta = 0.0 label = mixup_transform(label, classes, lam, eta) elif opt.label_smoothing: hard_label = label label = smooth(label, classes) if distillation: # teacher_prob = [nd.softmax(teacher(X.astype(opt.dtype, copy=False)) / opt.temperature) \ # for X in data] with ag.predict_mode(): teacher_prob = [ nd.softmax( teacher( nd.transpose( nd.image.resize( nd.transpose(X, (0, 2, 3, 1)), size=opt.teacher_imgsize), (0, 3, 1, 2))) / opt.temperature) for X in data ] with ag.record(): # outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] outputs = [net(X) for X in data] if distillation: # loss = [L(yhat.astype('float32', copy=False), # y.astype('float32', copy=False), # p.astype('float32', copy=False)) for yhat, y, p in zip(outputs, label, teacher_prob)] # print([outputs, label, teacher_prob]) loss = [ L(yhat, y, p) for yhat, y, p in zip(outputs, label, teacher_prob) ] else: # loss = [L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label)] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] if opt.amp: with amp.scale_loss(loss, trainer) as scaled_loss: ag.backward(scaled_loss) else: ag.backward(loss) if accumulate > 1: if (i + 1) % accumulate == 0: trainer.step(batch_size * accumulate) net.collect_params().zero_grad() else: trainer.step(batch_size) train_loss_metric.update(0, loss) if opt.mixup: output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \ for out in outputs] train_metric.update(label, output_softmax) else: if opt.label_smoothing: train_metric.update(hard_label, outputs) else: train_metric.update(label, outputs) _, loss_score = train_loss_metric.get() train_metric_name, train_metric_score = train_metric.get() samplers_per_sec = batch_size / (time.time() - btic) postfix = f'{samplers_per_sec:.1f} imgs/sec, ' \ f'loss: {loss_score:.4f}, ' \ f'acc: {train_metric_score * 100:.2f}, ' \ f'lr: {trainer.learning_rate:.4e}' if opt.multi_scale: postfix += f', size: {data[0].shape[-1]}' pbar.set_postfix_str(postfix) pbar.update() btic = time.time() if opt.log_interval and not (i + 1) % opt.log_interval: step = epoch * num_batches + i wandb.log( { 'samplers_per_sec': samplers_per_sec, train_metric_name: train_metric_score, 'lr': trainer.learning_rate, 'loss': loss_score }, step=step) logger.info( 'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f' % (epoch, i, samplers_per_sec, train_metric_name, train_metric_score, trainer.learning_rate)) pbar.close() train_metric_name, train_metric_score = train_metric.get() throughput = int(batch_size * i / (time.time() - tic)) err_top1_val, err_top5_val = test(ctx, val_data) wandb.log({ 'err1': err_top1_val, 'err5': err_top5_val }, step=epoch * num_batches) logger.info('[Epoch %d] training: %s=%f' % (epoch, train_metric_name, train_metric_score)) logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f' % (epoch, throughput, time.time() - tic)) logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f' % (epoch, err_top1_val, err_top5_val)) if err_top1_val < best_val_score: best_val_score = err_top1_val net.save_parameters( '%s/%.4f-imagenet-%s-%d-best.params' % (save_dir, best_val_score, model_name, epoch)) trainer.save_states( '%s/%.4f-imagenet-%s-%d-best.states' % (save_dir, best_val_score, model_name, epoch)) if save_frequency and save_dir and (epoch + 1) % save_frequency == 0: net.save_parameters('%s/imagenet-%s-%d.params' % (save_dir, model_name, epoch)) trainer.save_states('%s/imagenet-%s-%d.states' % (save_dir, model_name, epoch)) if save_frequency and save_dir: net.save_parameters('%s/imagenet-%s-%d.params' % (save_dir, model_name, opt.num_epochs - 1)) trainer.save_states('%s/imagenet-%s-%d.states' % (save_dir, model_name, opt.num_epochs - 1)) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=not opt.multi_scale) if distillation: teacher.hybridize(static_alloc=True, static_shape=not opt.multi_scale) train(context)
from mxnet.image import imread logging.basicConfig(level=logging.WARNING, format='%(asctime)s %(message)s') import requests import json import binascii import numpy as np from pymongo import MongoClient from requests import ReadTimeout from pprint import pprint #image transform normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) test_transform = T.Compose( [T.Resize(256), T.CenterCrop(224), T.ToTensor(), normalize]) # define mongodb connect def get_db(): mongdb = {} mongdb['host'] = 'dds-bp10da4305cf39f41.mongodb.rds.aliyuncs.com' mongdb['port'] = 3717 client = MongoClient(host=mongdb['host'], port=mongdb['port']) dev = client.get_database('dev') dev.authenticate(name='nnsearch', password='******') return dev @asyncio.coroutine def download(url, session, semaphore, chunk_size=1 << 15):
def save_and_load_sequential_example(): # Use GPU if one exists, else use CPU. ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu() # MNIST images are 28x28. Total pixels in input layer is 28x28 = 784. #num_inputs = 784 # Clasify the images into one of the 10 digits. #num_outputs = 10 # 64 images in a batch. batch_size = 64 # Load the training data. train_data = gluon.data.DataLoader(gluon.data.vision.MNIST( train=True).transform_first(transforms.ToTensor()), batch_size, shuffle=True) valid_data = gluon.data.DataLoader(gluon.data.vision.MNIST( train=False).transform_first(transforms.ToTensor()), batch_size, shuffle=True) # Define a model. net = build_lenet(gluon.nn.Sequential()) # Initialize the parameters with Xavier initializer. net.collect_params().initialize(mx.init.Xavier(), ctx=ctx) # Use cross entropy loss. softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() # Use Adam optimizer. trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': .001}) train_model(net, trainer, softmax_cross_entropy, train_data, valid_data, num_epochs=10) #-------------------- if True: model_param_filepath = './lenet.params' # Save model parameters to file. net.save_parameters(model_param_filepath) # Define a model. new_net = build_lenet(gluon.nn.Sequential()) # Load model parameters from file. new_net.load_parameters(model_param_filepath, ctx=ctx) else: # NOTE [info] >> Sequential models may not be serialized as JSON files. model_filepath = './lenet.json' model_param_filepath = './lenet.params' # Save model architecture to file. sym_json = net(mx.sym.var('data')).tojson() sym_json = json.loads(sym_json) with open(model_filepath, 'w') as fd: json.dump(sym_json, fd, indent='\t') # Save model parameters to file. net.save_parameters(model_param_filepath) # Load model architecture from file. with open(model_filepath, 'r') as fd: sym_json = json.load(fd) sym_json = json.dumps(sym_json) new_net = gluon.nn.SymbolBlock(outputs=mx.sym.load_json(sym_json), inputs=mx.sym.var('data')) # Load model parameters from file. new_net.load_parameters(model_param_filepath, ctx=ctx) verify_loaded_model(new_net, ctx=ctx)
def train_neural_network_example(): mnist_train = datasets.FashionMNIST(train=True) X, y = mnist_train[0] print('X shape:', X.shape, 'X dtype:', X.dtype, 'y:', y) text_labels = [ 't-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot' ] X, y = mnist_train[0:6] # Plot images. _, figs = plt.subplots(1, X.shape[0], figsize=(15, 15)) for f, x, yi in zip(figs, X, y): # 3D -> 2D by removing the last channel dim. f.imshow(x.reshape((28, 28)).asnumpy()) ax = f.axes ax.set_title(text_labels[int(yi)]) ax.title.set_fontsize(20) ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.show() transformer = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(0.13, 0.31)]) mnist_train = mnist_train.transform_first(transformer) batch_size = 256 train_data = gluon.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=4) for data, label in train_data: print(data.shape, label.shape) break mnist_valid = gluon.data.vision.FashionMNIST(train=False) valid_data = gluon.data.DataLoader( mnist_valid.transform_first(transformer), batch_size=batch_size, num_workers=4) # Define a model. net = build_network(nn.Sequential()) net.initialize(init=init.Xavier()) #net.collect_params().initialize(init.Xavier(), ctx=ctx) # Define the loss function and optimization method for training. softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) #trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': .001}) train_model(net, trainer, softmax_cross_entropy, train_data, valid_data, num_epochs=10) # Save the model parameters. net.save_parameters('./net.params')
def __init__(self, args): self.args = args # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) # dataset and dataloader trainset = get_mxnet_dataset(args.dataset, split='train', transform=input_transform) valset = get_mxnet_dataset(args.dataset, split='val', transform=input_transform) self.train_data = gluon.data.DataLoader(trainset, args.batch_size, shuffle=True, last_batch='rollover', num_workers=args.workers) self.eval_data = gluon.data.DataLoader(valset, args.test_batch_size, last_batch='keep', num_workers=args.workers) # create network model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, norm_layer=args.norm_layer, aux=args.aux) print(model) self.net = DataParallelModel(model, args.ctx, args.syncbn) self.evaluator = DataParallelModel(SegEvalModel(model), args.ctx) # resume checkpoint if needed if args.resume is not None: if os.path.isfile(args.resume): model.load_params(args.resume, ctx=args.ctx) else: raise RuntimeError("=> no checkpoint found at '{}'" \ .format(args.resume)) # create criterion criterion = SoftmaxCrossEntropyLossWithAux(args.aux) self.criterion = DataParallelCriterion(criterion, args.ctx, args.syncbn) # optimizer and lr scheduling self.lr_scheduler = LRScheduler(mode='poly', baselr=args.lr, niters=len(self.train_data), nepochs=args.epochs) kv = mx.kv.create(args.kvstore) self.optimizer = gluon.Trainer(self.net.module.collect_params(), 'sgd', { 'lr_scheduler': self.lr_scheduler, 'wd': args.weight_decay, 'momentum': args.momentum, 'multi_precision': True }, kvstore=kv) time_str = time.strftime("%Y-%m-%d___%H-%M-%S", time.localtime()) net_name = args.model dataset_name = args.dataset note = args.backbone log_dir = os.path.join(self.args.logdir, net_name, dataset_name, note, time_str) os.makedirs(log_dir, exist_ok=True) self.writer = SummaryWriter(log_dir=log_dir) config_str = json.dumps(self.args, indent=2, sort_keys=True).replace( '\n', '\n\n').replace(' ', '\t') self.writer.add_text(tag='config', text_string=config_str)
if not os.path.exists(save_dir): makedirs(save_dir) name = opt.name plot_path = opt.save_plot_dir logging.basicConfig(level=logging.INFO) logging.info(opt) trans, aug_trans = list(), list() if opt.data_aug: aug_trans = [ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight() ] trans.append(transforms.ToTensor()) trans.append( transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])) transform_train = transforms.Compose(aug_trans + trans) transform_test = transforms.Compose(trans) data_sampler = eval('DataSampler%d' % opt.data_sampler)() num_data_samples = opt.data_k use_som = opt.use_som use_pillars = opt.plc_loss pillar_sampler = eval('PillarSampler%d' % opt.pillar_sampler)() num_pillar_samples = opt.pillar_k w1 = opt.w1 w2 = opt.w2 cumulative = opt.cumulative
def train(net, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] if not opt.resume_from: net.initialize(mx.init.Xavier(), ctx=ctx) if opt.dataset == 'NC_CIFAR100': n = mx.nd.zeros(shape=(1, 3, 32, 32), ctx=ctx[0]) #####init CNN else: raise KeyError('dataset keyerror') for m in range(9): net(n, m) def makeSchedule(start_lr, base_lr, length, step, factor): schedule = mx.lr_scheduler.MultiFactorScheduler(step=step, factor=factor) schedule.base_lr = base_lr schedule = LinearWarmUp(schedule, start_lr=start_lr, length=length) return schedule # ========================================================================== sesses = list(np.arange(opt.sess_num)) epochs = [opt.epoch] * opt.sess_num lrs = [opt.base_lrs] + [opt.lrs] * (opt.sess_num - 1) lr_decay = opt.lr_decay base_decay_epoch = [int(i) for i in opt.base_decay_epoch.split(',')] + [np.inf] lr_decay_epoch = [base_decay_epoch ] + [[opt.inc_decay_epoch, np.inf]] * (opt.sess_num - 1) AL_weight = opt.AL_weight min_weight = opt.min_weight oce_weight = opt.oce_weight pdl_weight = opt.pdl_weight max_weight = opt.max_weight temperature = opt.temperature use_AL = opt.use_AL # anchor loss use_ng_min = opt.use_ng_min # Neural Gas min loss use_ng_max = opt.use_ng_max # Neural Gas min loss ng_update = opt.ng_update # Neural Gas update node use_oce = opt.use_oce # old samples cross entropy loss use_pdl = opt.use_pdl # probability distillation loss use_nme = opt.use_nme # Similarity loss use_warmUp = opt.use_warmUp use_ng = opt.use_ng # Neural Gas fix_conv = opt.fix_conv # fix cnn to train novel classes fix_epoch = opt.fix_epoch c_way = opt.c_way k_shot = opt.k_shot base_acc = opt.base_acc # base model acc select_best_method = opt.select_best # select from _best, _best2, _best3 init_class = 60 anchor_num = 400 # ========================================================================== acc_dict = {} all_best_e = [] if model_name[-7:] != 'maxhead': net.fc3.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) net.fc4.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) net.fc5.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) net.fc6.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) net.fc7.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) net.fc8.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) net.fc9.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) net.fc10.initialize(mx.init.Normal(sigma=0.001), ctx=ctx, force_reinit=True) for sess in sesses: logger.info('session : %d' % sess) schedule = makeSchedule(start_lr=0, base_lr=lrs[sess], length=5, step=lr_decay_epoch[sess], factor=lr_decay) # prepare the first anchor batch if sess == 0 and opt.resume_from: acc_dict[str(sess)] = list() acc_dict[str(sess)].append([base_acc, 0]) all_best_e.append(0) continue # quick cnn totally unfix, not use data augmentation if sess == 1 and model_name == 'quick_cnn' and use_AL: transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) anchor_trans = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) else: transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.5071, 0.4866, 0.4409], [0.2009, 0.1984, 0.2023]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.5071, 0.4866, 0.4409], [0.2009, 0.1984, 0.2023]) ]) anchor_trans = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.5071, 0.4866, 0.4409], [0.2009, 0.1984, 0.2023]) ]) # ng_init and ng_update if use_AL or use_nme or use_pdl or use_oce: if sess != 0: if ng_update == True: if sess == 1: update_anchor1, bmu, variances= \ prepare_anchor(DATASET,logger,anchor_trans,num_workers,feature_size,net,ctx,use_ng,init_class) update_anchor_data = DataLoader( update_anchor1, anchor_trans, update_anchor1.__len__(), num_workers, shuffle=False) if opt.ng_var: idx_1 = np.where(variances.asnumpy() > 0.5) idx_2 = np.where(variances.asnumpy() < 0.5) variances[idx_1] = 0.9 variances[idx_2] = 1 else: base_class = init_class + (sess - 1) * 5 new_class = list(init_class + (sess - 1) * 5 + (np.arange(5))) new_set = DATASET(train=True, fine_label=True, fix_class=new_class, base_class=base_class, logger=logger) update_anchor2 = merge_datasets( update_anchor1, new_set) update_anchor_data = DataLoader( update_anchor2, anchor_trans, update_anchor2.__len__(), num_workers, shuffle=False) elif (sess == 1): update_anchor, bmu, variances = \ prepare_anchor(DATASET,logger,anchor_trans,num_workers,feature_size,net,ctx,use_ng,init_class) update_anchor_data = DataLoader(update_anchor, anchor_trans, update_anchor.__len__(), num_workers, shuffle=False) if opt.ng_var: idx_1 = np.where(variances.asnumpy() > 0.5) idx_2 = np.where(variances.asnumpy() < 0.5) variances[idx_1] = 0.9 variances[idx_2] = 1 for batch in update_anchor_data: anc_data = gluon.utils.split_and_load(batch[0], ctx_list=[ctx[0]], batch_axis=0) anc_label = gluon.utils.split_and_load(batch[1], ctx_list=[ctx[0]], batch_axis=0) with ag.pause(): anchor_feat, anchor_logit = net(anc_data[0], sess - 1) anchor_feat = [anchor_feat] anchor_logit = [anchor_logit] trainer = gluon.Trainer(net.collect_params(), optimizer, { 'learning_rate': lrs[sess], 'wd': opt.wd, 'momentum': opt.momentum }) metric = mx.metric.Accuracy() train_metric = mx.metric.Accuracy() loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() # ========================================================================== # all loss init if use_nme: def loss_fn_disG(f1, f2, weight): f1 = f1.reshape(anchor_num, -1) f2 = f2.reshape(anchor_num, -1) similar = mx.nd.sum(f1 * f2, 1) return (1 - similar) * weight digG_weight = opt.nme_weight if use_AL: if model_name == 'quick_cnn': AL_w = [120, 75, 120, 100, 50, 60, 90, 90] AL_weight = AL_w[sess - 1] else: AL_weight = opt.AL_weight if opt.ng_var: def l2lossVar(feat, anc, weight, var): dim = feat.shape[1] feat = feat.reshape(-1, dim) anc = anc.reshape(-1, dim) loss = mx.nd.square(feat - anc) loss = loss * weight * var return mx.nd.mean(loss, axis=0, exclude=True) loss_fn_AL = l2lossVar else: loss_fn_AL = gluon.loss.L2Loss(weight=AL_weight) if use_pdl: loss_fn_pdl = DistillationSoftmaxCrossEntropyLoss( temperature=temperature, hard_weight=0, weight=pdl_weight) if use_oce: loss_fn_oce = gluon.loss.SoftmaxCrossEntropyLoss(weight=oce_weight) if use_ng_min: loss_fn_max = NG_Max_Loss(lmbd=max_weight, margin=0.5) if use_ng_min: min_loss = NG_Min_Loss( num_classes=opt.c_way, feature_size=feature_size, lmbd=min_weight, # center weight = 0.01 in the paper ctx=ctx[0]) min_loss.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx, force_reinit=True) # init matrix. center_trainer = gluon.Trainer( min_loss.collect_params(), optimizer="sgd", optimizer_params={"learning_rate": opt.ng_min_lr}) # alpha=0.1 in the paper. # ========================================================================== lr_decay_count = 0 # dataloader if opt.cum and sess == 1: base_class = list(np.arange(init_class)) joint_data = DATASET(train=True, fine_label=True, c_way=init_class, k_shot=500, fix_class=base_class, logger=logger) if sess == 0: base_class = list(np.arange(init_class)) new_class = list(init_class + (np.arange(5))) base_data = DATASET(train=True, fine_label=True, c_way=init_class, k_shot=500, fix_class=base_class, logger=logger) bc_val_data = DataLoader(DATASET(train=False, fine_label=True, fix_class=base_class, logger=logger), transform_test, 100, num_workers, shuffle=False) nc_val_data = DataLoader(DATASET(train=False, fine_label=True, fix_class=new_class, base_class=len(base_class), logger=logger), transform_test, 100, num_workers, shuffle=False) else: base_class = list(np.arange(init_class + (sess - 1) * 5)) new_class = list(init_class + (sess - 1) * 5 + (np.arange(5))) train_data_nc = DATASET(train=True, fine_label=True, c_way=c_way, k_shot=k_shot, fix_class=new_class, base_class=len(base_class), logger=logger) bc_val_data = DataLoader(DATASET(train=False, fine_label=True, fix_class=base_class, logger=logger), transform_test, 100, num_workers, shuffle=False) nc_val_data = DataLoader(DATASET(train=False, fine_label=True, fix_class=new_class, base_class=len(base_class), logger=logger), transform_test, 100, num_workers, shuffle=False) if sess == 0: train_data = DataLoader(base_data, transform_train, min(batch_size, base_data.__len__()), num_workers, shuffle=True) else: if opt.cum: # cumulative : merge base and novel dataset. joint_data = merge_datasets(joint_data, train_data_nc) train_data = DataLoader(joint_data, transform_train, min(batch_size, joint_data.__len__()), num_workers, shuffle=True) elif opt.use_all_novel: # use all novel data if sess == 1: novel_data = train_data_nc else: novel_data = merge_datasets(novel_data, train_data_nc) train_data = DataLoader(novel_data, transform_train, min(batch_size, novel_data.__len__()), num_workers, shuffle=True) else: # basic method train_data = DataLoader(train_data_nc, transform_train, min(batch_size, train_data_nc.__len__()), num_workers, shuffle=True) for epoch in range(epochs[sess]): tic = time.time() train_metric.reset() metric.reset() train_loss, train_anchor_loss, train_oce_loss = 0, 0, 0 train_disg_loss, train_pdl_loss, train_min_loss = 0, 0, 0 train_max_loss = 0 num_batch = len(train_data) if use_warmUp: lr = schedule(epoch) trainer.set_learning_rate(lr) else: lr = trainer.learning_rate if epoch == lr_decay_epoch[sess][lr_decay_count]: trainer.set_learning_rate(trainer.learning_rate * lr_decay) lr_decay_count += 1 if sess != 0 and epoch < fix_epoch: fix_cnn = fix_conv else: fix_cnn = False for i, batch in enumerate(train_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) all_loss = list() with ag.record(): output_feat, output = net(data[0], sess, fix_cnn) output_feat = [output_feat] output = [output] loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)] all_loss.extend(loss) if use_nme: anchor_h = [net(X, sess, fix_cnn)[0] for X in anc_data] disg_loss = [ loss_fn_disG(a_h, a, weight=digG_weight) for a_h, a in zip(anchor_h, anchor_feat) ] all_loss.extend(disg_loss) if sess > 0 and use_ng_max: max_loss = [ loss_fn_max(feat, label, feature_size, epoch, sess, init_class) for feat, label in zip(output_feat, label) ] all_loss.extend(max_loss[0]) if sess > 0 and use_AL: # For anchor loss anchor_h = [net(X, sess, fix_cnn)[0] for X in anc_data] if opt.ng_var: anchor_loss = [ loss_fn_AL(anchor_h[0], anchor_feat[0], AL_weight, variances) ] all_loss.extend(anchor_loss) else: anchor_loss = [ loss_fn_AL(a_h, a) for a_h, a in zip(anchor_h, anchor_feat) ] all_loss.extend(anchor_loss) if sess > 0 and use_ng_min: loss_min = min_loss(output_feat[0], label[0]) all_loss.extend(loss_min) if sess > 0 and use_pdl: anchor_l = [net(X, sess, fix_cnn)[1] for X in anc_data] anchor_l = [anchor_l[0][:, :60 + (sess - 1) * 5]] soft_label = [ mx.nd.softmax(anchor_logit[0][:, :60 + (sess - 1) * 5] / temperature) ] pdl_loss = [ loss_fn_pdl(a_h, a, soft_a) for a_h, a, soft_a in zip( anchor_l, anc_label, soft_label) ] all_loss.extend(pdl_loss) if sess > 0 and use_oce: anchorp = [net(X, sess, fix_cnn)[1] for X in anc_data] oce_Loss = [ loss_fn_oce(ap, a) for ap, a in zip(anchorp, anc_label) ] all_loss.extend(oce_Loss) all_loss = [nd.mean(l) for l in all_loss] ag.backward(all_loss) trainer.step(1, ignore_stale_grad=True) if use_ng_min: center_trainer.step(opt.c_way * opt.k_shot) train_loss += sum([l.sum().asscalar() for l in loss]) if sess > 0 and use_AL: train_anchor_loss += sum( [al.mean().asscalar() for al in anchor_loss]) if sess > 0 and use_oce: train_oce_loss += sum( [al.mean().asscalar() for al in oce_Loss]) if sess > 0 and use_nme: train_disg_loss += sum( [al.mean().asscalar() for al in disg_loss]) if sess > 0 and use_pdl: train_pdl_loss += sum( [al.mean().asscalar() for al in pdl_loss]) if sess > 0 and use_ng_min: train_min_loss += sum( [al.mean().asscalar() for al in loss_min]) if sess > 0 and use_ng_max: train_max_loss += sum( [al.mean().asscalar() for al in max_loss[0]]) train_metric.update(label, output) train_loss /= batch_size * num_batch name, acc = train_metric.get() name, bc_val_acc = test(ctx, bc_val_data, net, sess) name, nc_val_acc = test(ctx, nc_val_data, net, sess) if epoch == 0: acc_dict[str(sess)] = list() acc_dict[str(sess)].append([bc_val_acc, nc_val_acc]) if sess == 0: overall = bc_val_acc else: overall = (bc_val_acc * (init_class + (sess - 1) * 5) + nc_val_acc * 5) / (init_class + sess * 5) logger.info( '[Epoch %d] lr=%.4f train=%.4f | val(base)=%.4f val(novel)=%.4f overall=%.4f | loss=%.8f anc loss=%.8f ' 'pdl loss:%.8f oce loss: %.8f time: %.8f' % (epoch, lr, acc, bc_val_acc, nc_val_acc, overall, train_loss, train_anchor_loss / AL_weight, train_pdl_loss / pdl_weight, train_oce_loss / oce_weight, time.time() - tic)) if use_nme: logger.info('digG loss:%.8f' % (train_disg_loss / digG_weight)) if use_ng_min: logger.info('min_loss:%.8f' % (train_min_loss / min_weight)) if use_ng_max: logger.info('max_loss:%.8f' % (train_max_loss / max_weight)) if save_period and save_dir and (epoch + 1) % save_period == 0: net.save_parameters('%s/sess-%s-%d.params' % (save_dir, model_name, epoch)) select = eval(select_best_method) best_e = select(acc_dict, sess) logger.info('best select : base: %f novel: %f ' % (acc_dict[str(sess)][best_e][0], acc_dict[str(sess)][best_e][1])) if use_AL and model_name == 'quick_cnn': reload_path = '%s/sess-%s-%d.params' % (save_dir, model_name, best_e) net.load_parameters(reload_path, ctx=context) all_best_e.append(best_e) reload_path = '%s/sess-%s-%d.params' % (save_dir, model_name, best_e) net.load_parameters(reload_path, ctx=context) with open('%s/acc_dict.json' % save_dir, 'w') as json_file: json.dump(acc_dict, json_file) plot_pr(acc_dict, sess, save_dir) plot_all_sess(acc_dict, save_dir, all_best_e)
def main(im_dir, ckpt_path, ctx, out_dir='result', write_images=True): east_model = east.EAST(nclass=2, text_scale=1024) ctx = mx.cpu() if ctx < 0 else mx.gpu() # east_model.hybridize() east_model.load_parameters(ckpt_path, ctx) east_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]) ]) imlist = glob.glob1(im_dir, '*g') for i, im_name in enumerate(imlist): im_path = os.path.join(im_dir, im_name) start_time = time.time() im = cv2.imread(im_path) im_resized, (ratio_h, ratio_w) = resize_image(im, max_side_len=2048) timer = {'net': 0, 'restore': 0, 'nms': 0} start = time.time() im_resized = east_transform( mx.nd.array(np.array(im_resized).astype('float32'))) f_score, f_geometry = east_model.forward( im_resized.expand_dims(axis=0)) timer['net'] = time.time() - start score_map = f_score.asnumpy().transpose((0, 2, 3, 1)) cv2.imwrite("score_map{}.png".format(i), score_map[0, :, :, 0] * 255) geo_map = f_geometry.asnumpy().transpose((0, 2, 3, 1)) boxes, timer = detect(score_map, geo_map, timer) print('{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format( im_name, timer['net'] * 1000, timer['restore'] * 1000, timer['nms'] * 1000)) if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_h boxes[:, :, 1] /= ratio_w duration = time.time() - start_time print('[timing] {}'.format(duration)) # save to file if boxes is not None: res_file = os.path.join( out_dir, '{}.txt'.format(os.path.basename(im_path).split('.')[0])) print("num_Boxes:{}".format(len(boxes))) with open(res_file, 'w') as f: for box in boxes: # to avoid submitting errors box = sort_poly(box.astype(np.int32)) # if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3] - box[0]) < 5: # continue f.write('{},{},{},{},{},{},{},{}\r\n'.format( box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1], )) cv2.polylines(im[:, :, ::-1].astype(np.uint8), [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=2) if write_images: img_path = os.path.join(out_dir, im_name) cv2.imwrite(img_path, im)
def main(): mnist_train = datasets.FashionMNIST(train=True) X, y = mnist_train[0] #print('X ',X) #print('y ', y) print('X shape: ', X.shape, 'X dtype ', X.dtype, 'y:', y) text_labels = [ 't-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot' ] X, y = mnist_train[0:10] # plot images # display.set_matplotlib_formats('svg') # _, figs = plt.subplots(1, X.shape[0], figsize=(15, 15)) # for f,x,yi in zip(figs, X,y): # # 3D->2D by removing the last channel dim # f.imshow(x.reshape((28,28)).asnumpy()) # ax = f.axes # ax.set_title(text_labels[int(yi)]) # ax.title.set_fontsize(14) # ax.get_xaxis().set_visible(False) # ax.get_yaxis().set_visible(False) # plt.show() # transformer = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(0.13, 0.31)]) mnist_train = mnist_train.transform_first(transformer) batch_size = 64 train_data = gluon.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=4) mnist_valid = gluon.data.vision.FashionMNIST(train=False) valid_data = gluon.data.DataLoader( mnist_valid.transform_first(transformer), batch_size=batch_size, num_workers=4) net = nn.Sequential() net.add(nn.Conv2D(channels=6, kernel_size=5, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=16, kernel_size=3, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(), nn.Dense(120, activation='relu'), nn.Dense(84, activation='relu'), nn.Dense(10)) net.initialize(init=init.Xavier()) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer_opt = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) for epoch in range(10): train_loss, train_acc, valid_acc = 0., 0., 0., tic = time.time() for data, label in train_data: # forward and backward with autograd.record(): output = net(data) loss = softmax_cross_entropy(output, label) loss.backward() # update parameters trainer_opt.step(batch_size) # calculate training metrics train_loss += loss.mean().asscalar() train_acc += acc(output, label) for data, label in valid_data: valid_acc += acc(net(data), label) print( "epoch %d: loss %.3f, train acc %.3f, test acc %.3f, in %.1f sec" % (epoch, train_loss / len(train_data), train_acc / len(train_data), valid_acc / len(valid_data), time.time() - tic)) net.save_parameters('net.params')
def test(model): if args.dataset == 'cifar10': transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) train_data = gluon.data.DataLoader(gluon.data.vision.CIFAR10( train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=args.num_workers) val_data = gluon.data.DataLoader(gluon.data.vision.CIFAR10( train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=args.num_workers) elif args.dataset == 'cifar100': transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.5071, 0.4865, 0.4409], [0.2673, 0.2564, 0.2762]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.5071, 0.4865, 0.4409], [0.2673, 0.2564, 0.2762]) ]) train_data = gluon.data.DataLoader(gluon.data.vision.CIFAR100( train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers) val_data = gluon.data.DataLoader(gluon.data.vision.CIFAR100( train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=num_workers) metric = mx.metric.Accuracy() metric.reset() for i, batch in enumerate(val_data): data = gluon.utils.split_and_load(batch[0], ctx_list=context, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=context, batch_axis=0) outputs = [model(X.astype(args.dtype, copy=False)) for X in data] metric.update(label, outputs) _, acc = metric.get() print('\nTest-set Accuracy: ', acc) return acc
def get_built_in_dataset(name, train=True, input_size=224, batch_size=256, num_workers=32, shuffle=True, fine_label=False, **kwargs): """Returns built-in popular image classification dataset based on provided string name ('cifar10', 'cifar100','mnist','imagenet'). """ logger.info(f'get_built_in_dataset {name}') name = name.lower() if name in ('cifar10', 'cifar'): import gluoncv.data.transforms as gcv_transforms if train: transform_split = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) else: transform_split = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) return gluon.data.vision.CIFAR10( train=train).transform_first(transform_split) elif name == 'cifar100': import gluoncv.data.transforms as gcv_transforms if train: transform_split = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) else: transform_split = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) return gluon.data.vision.CIFAR100( train=train, fine_label=fine_label).transform_first(transform_split) elif name == 'mnist': def transform(data, label): return nd.transpose(data.astype(np.float32), (2, 0, 1)) / 255, label.astype(np.float32) return gluon.data.vision.MNIST(train=train, transform=transform) elif name == 'fashionmnist': def transform(data, label): return nd.transpose(data.astype(np.float32), (2, 0, 1)) / 255, label.astype(np.float32) return gluon.data.vision.FashionMNIST(train=train, transform=transform) elif name == 'imagenet': print("Loading the imagenet from ~/.mxnet/imagenet/") # Please setup the ImageNet dataset following the tutorial from GluonCV if train: rec_file = '~/.mxnet/imagenet/train.rec' rec_file_idx = '~/.mxnet/imagenet/train.idx' else: rec_file = '~/.mxnet/imagenet/val.rec' rec_file_idx = '~/.mxnet/imagenet/val.idx' data_loader = get_data_rec(input_size, 0.875, rec_file, rec_file_idx, batch_size, num_workers, train, shuffle=shuffle, **kwargs) return data_loader else: raise NotImplementedError
def train(): if config.restart_training: shutil.rmtree(config.output_dir, ignore_errors=True) if config.output_dir is None: config.output_dir = 'output' if not os.path.exists(config.output_dir): os.makedirs(config.output_dir) logger = setup_logger(os.path.join(config.output_dir, 'train_log')) logger.info('train with gpu %s and mxnet %s' % (config.gpu_id, mx.__version__)) ctx = mx.gpu(config.gpu_id) # 设置随机种子 mx.random.seed(2) mx.random.seed(2, ctx=ctx) train_transfroms = transforms.Compose( [transforms.RandomBrightness(0.5), transforms.ToTensor()]) train_dataset = ImageDataset(config.trainfile, (config.img_h, config.img_w), 3, 80, config.alphabet, phase='train') train_data_loader = DataLoader( train_dataset.transform_first(train_transfroms), config.train_batch_size, shuffle=True, last_batch='keep', num_workers=config.workers) test_dataset = ImageDataset(config.testfile, (config.img_h, config.img_w), 3, 80, config.alphabet, phase='test') test_data_loader = DataLoader(test_dataset.transform_first( transforms.ToTensor()), config.eval_batch_size, shuffle=True, last_batch='keep', num_workers=config.workers) net = CRNN(len(config.alphabet), hidden_size=config.nh) net.hybridize() if not config.restart_training and config.checkpoint != '': logger.info('load pretrained net from {}'.format(config.checkpoint)) net.load_parameters(config.checkpoint, ctx=ctx) else: net.initialize(ctx=ctx) criterion = gluon.loss.CTCLoss() all_step = len(train_data_loader) logger.info('each epoch contains {} steps'.format(all_step)) schedule = mx.lr_scheduler.FactorScheduler(step=config.lr_decay_step * all_step, factor=config.lr_decay, stop_factor_lr=config.end_lr) # schedule = mx.lr_scheduler.MultiFactorScheduler(step=[15 * all_step, 30 * all_step, 60 * all_step,80 * all_step], # factor=0.1) adam_optimizer = mx.optimizer.Adam(learning_rate=config.lr, lr_scheduler=schedule) trainer = gluon.Trainer(net.collect_params(), optimizer=adam_optimizer) sw = SummaryWriter(logdir=config.output_dir) for epoch in range(config.start_epoch, config.end_epoch): loss = .0 train_acc = .0 tick = time.time() cur_step = 0 for i, (data, label) in enumerate(train_data_loader): data = data.as_in_context(ctx) label = label.as_in_context(ctx) with autograd.record(): output = net(data) loss_ctc = criterion(output, label) loss_ctc.backward() trainer.step(data.shape[0]) loss_c = loss_ctc.mean() cur_step = epoch * all_step + i sw.add_scalar(tag='ctc_loss', value=loss_c.asscalar(), global_step=cur_step // 2) sw.add_scalar(tag='lr', value=trainer.learning_rate, global_step=cur_step // 2) loss += loss_c acc = accuracy(output, label, config.alphabet) train_acc += acc if (i + 1) % config.display_interval == 0: acc /= len(label) sw.add_scalar(tag='train_acc', value=acc, global_step=cur_step) batch_time = time.time() - tick logger.info( '[{}/{}], [{}/{}],step: {}, Speed: {:.3f} samples/sec, ctc loss: {:.4f},acc: {:.4f}, lr:{},' ' time:{:.4f} s'.format( epoch, config.end_epoch, i, all_step, cur_step, config.display_interval * config.train_batch_size / batch_time, loss.asscalar() / config.display_interval, acc, trainer.learning_rate, batch_time)) loss = .0 tick = time.time() nd.waitall() if epoch == 0: sw.add_graph(net) logger.info('start val ....') train_acc /= train_dataset.__len__() validation_accuracy = evaluate_accuracy( net, test_data_loader, ctx, config.alphabet) / test_dataset.__len__() sw.add_scalar(tag='val_acc', value=validation_accuracy, global_step=cur_step) logger.info("Epoch {},train_acc {:.4f}, val_acc {:.4f}".format( epoch, train_acc, validation_accuracy)) net.save_parameters("{}/{}_{:.4f}_{:.4f}.params".format( config.output_dir, epoch, train_acc, validation_accuracy)) sw.close()
def train_mnist(): mnist_train = get_train_data() print("type(mnist_train)=", type(mnist_train)) transformer = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(0.13, 0.31)]) mnist_train = mnist_train.transform_first(transformer) batch_size = 256 train_data = gluon.data.DataLoader( mnist_train, batch_size=batch_size, shuffle=True, num_workers=4) mnist_valid = gluon.data.vision.FashionMNIST(train=False) valid_data = gluon.data.DataLoader( mnist_valid.transform_first(transformer), batch_size=batch_size, num_workers=4) # for data, label in train_data: # print(data.shape, label.shape) # break net = nn.Sequential() net.add(nn.Conv2D(channels=6, kernel_size=5, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Conv2D(channels=16, kernel_size=3, activation='relu'), nn.MaxPool2D(pool_size=2, strides=2), nn.Flatten(), nn.Dense(120, activation="relu"), nn.Dense(84, activation="relu"), nn.Dense(10)) net.initialize(init=init.Xavier()) softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1}) for epoch in range(10): train_loss, train_acc, valid_acc = 0., 0., 0. tic = time.time() for data, label in train_data: # forward + backward with autograd.record(): output = net(data) loss = softmax_cross_entropy(output, label) loss.backward() # update parameters trainer.step(batch_size) # calculate training metrics train_loss += loss.mean().asscalar() train_acc += acc(output, label) # calculate validation accuracy for data, label in valid_data: valid_acc += acc(net(data), label) print("Epoch %d: loss %.3f, train acc %.3f, test acc %.3f, in %.1f sec" % ( epoch, train_loss / len(train_data), train_acc / len(train_data), valid_acc / len(valid_data), time.time() - tic)) net.save_parameters('./output_weight/net_%s.params'%(epoch))
def __call__(self, x=None): x = T.Resize((self.w, self.h))(x) x = T.ToTensor()(x) x = T.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))(x) return x
import numpy as np import mxnet as mx from mxnet import nd import time from mxnet.gluon.data.vision import transforms gpu_total_time = 0.0 transformer = transforms.ToTensor() count = 1000 for i in range(count): image = mx.nd.random.uniform(0, 255, (1, 512, 512, 3), ctx=mx.gpu(0)).astype(np.uint8) tic = time.time() #image.as_in_context(mx.gpu(0)) res = transformer(image) # To force the calculation #a = res.shape res.wait_to_read() tac = time.time() gpu_total_time += (tac - tic) * 1000 print("Total time for GPU ToTensor - ", gpu_total_time) print("Average time per ToTensor 1,512,512,3 - ", gpu_total_time / count) cpu_total_time = 0.0 transformer = transforms.ToTensor() count = 1000 for i in range(count): image = mx.nd.random.uniform(0, 255, (1, 512, 512, 3)).astype(np.uint8)
from mxnet.gluon.data import DataLoader, Dataset from mxnet import nd from mxnet.image import imread import os import numpy as np import mxnet as mx import mxnet.gluon.data.vision.transforms as T from collections import Counter normalize = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) default_transform = T.Compose([ T.Resize(256), T.RandomResizedCrop(size=224, scale=(1.0, 1.0), ratio=(1.0, 1.0)), # just crop,not scale T.RandomFlipLeftRight(), T.ToTensor(), # last to swap channel to c,w,h normalize ]) test_transform = T.Compose( [T.Resize(256), T.CenterCrop(224), T.ToTensor(), normalize]) # like the class DeepInClassFashion(Dataset): """ the DeepInClassFashion dataset.read data from list_item_inshop.txt, """ def __init__(self,
def __init__(self, args, logger): self.args = args self.logger = logger # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) # dataset and dataloader data_kwargs = { 'transform': input_transform, 'base_size': args.base_size, 'crop_size': args.crop_size } trainset = get_segmentation_dataset(args.dataset, split=args.train_split, mode='train', **data_kwargs) valset = get_segmentation_dataset(args.dataset, split='val', mode='val', **data_kwargs) self.train_data = gluon.data.DataLoader(trainset, args.batch_size, shuffle=True, last_batch='rollover', num_workers=args.workers) self.eval_data = gluon.data.DataLoader(valset, args.test_batch_size, last_batch='rollover', num_workers=args.workers) # create network if args.model_zoo is not None: model = get_model(args.model_zoo, norm_layer=args.norm_layer, norm_kwargs=args.norm_kwargs, aux=args.aux, base_size=args.base_size, crop_size=args.crop_size, pretrained=args.pretrained) else: model = get_segmentation_model(model=args.model, dataset=args.dataset, backbone=args.backbone, norm_layer=args.norm_layer, norm_kwargs=args.norm_kwargs, aux=args.aux, base_size=args.base_size, crop_size=args.crop_size) # for resnest use only from gluoncv.nn.dropblock import set_drop_prob from functools import partial apply_drop_prob = partial(set_drop_prob, 0.0) model.apply(apply_drop_prob) model.cast(args.dtype) logger.info(model) self.net = DataParallelModel(model, args.ctx, args.syncbn) self.evaluator = DataParallelModel(SegEvalModel(model), args.ctx) # resume checkpoint if needed if args.resume is not None: if os.path.isfile(args.resume): model.load_parameters(args.resume, ctx=args.ctx) else: raise RuntimeError("=> no checkpoint found at '{}'".format( args.resume)) # create criterion if 'icnet' in args.model: criterion = ICNetLoss(crop_size=args.crop_size) elif 'danet' in args.model or (args.model_zoo and 'danet' in args.model_zoo): criterion = SegmentationMultiLosses() else: criterion = MixSoftmaxCrossEntropyLoss(args.aux, aux_weight=args.aux_weight) self.criterion = DataParallelCriterion(criterion, args.ctx, args.syncbn) # optimizer and lr scheduling self.lr_scheduler = LRSequential([ LRScheduler('linear', base_lr=0, target_lr=args.lr, nepochs=args.warmup_epochs, iters_per_epoch=len(self.train_data)), LRScheduler(mode='poly', base_lr=args.lr, nepochs=args.epochs - args.warmup_epochs, iters_per_epoch=len(self.train_data), power=0.9) ]) kv = mx.kv.create(args.kvstore) if args.optimizer == 'sgd': optimizer_params = { 'lr_scheduler': self.lr_scheduler, 'wd': args.weight_decay, 'momentum': args.momentum, 'learning_rate': args.lr } elif args.optimizer == 'adam': optimizer_params = { 'lr_scheduler': self.lr_scheduler, 'wd': args.weight_decay, 'learning_rate': args.lr } else: raise ValueError('Unsupported optimizer {} used'.format( args.optimizer)) if args.dtype == 'float16': optimizer_params['multi_precision'] = True if args.no_wd: for k, v in self.net.module.collect_params( '.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 self.optimizer = gluon.Trainer(self.net.module.collect_params(), args.optimizer, optimizer_params, kvstore=kv) # evaluation metrics self.metric = gluoncv.utils.metrics.SegmentationMetric( trainset.num_class)
def test(args): # output folder outdir = 'outdir' if not os.path.exists(outdir): os.makedirs(outdir) # image transform input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([.485, .456, .406], [.229, .224, .225]), ]) # dataset and dataloader if args.eval: testset = get_segmentation_dataset(args.dataset, split='val', mode='testval', transform=input_transform) total_inter, total_union, total_correct, total_label = \ np.int64(0), np.int64(0), np.int64(0), np.int64(0) else: testset = get_segmentation_dataset(args.dataset, split='test', mode='test', transform=input_transform) test_data = gluon.data.DataLoader(testset, args.test_batch_size, last_batch='keep', batchify_fn=ms_batchify_fn, num_workers=args.workers) # create network if args.model_zoo is not None: model = get_model(args.model_zoo, pretrained=True) else: model = get_segmentation_model(model=args.model, dataset=args.dataset, ctx=args.ctx, backbone=args.backbone, norm_layer=args.norm_layer) # load pretrained weight assert args.resume is not None, '=> Please provide the checkpoint using --resume' if os.path.isfile(args.resume): model.load_params(args.resume, ctx=args.ctx) else: raise RuntimeError("=> no checkpoint found at '{}'" \ .format(args.resume)) print(model) evaluator = MultiEvalModel(model, testset.num_class, ctx_list=args.ctx) tbar = tqdm(test_data) for i, (data, dsts) in enumerate(tbar): if args.eval: targets = dsts predicts = evaluator.parallel_forward(data) for predict, target in zip(predicts, targets): target = target.as_in_context(predict[0].context) correct, labeled = batch_pix_accuracy(predict[0], target) inter, union = batch_intersection_union( predict[0], target, testset.num_class) total_correct += correct.astype('int64') total_label += labeled.astype('int64') total_inter += inter.astype('int64') total_union += union.astype('int64') pixAcc = np.float64(1.0) * total_correct / ( np.spacing(1, dtype=np.float64) + total_label) IoU = np.float64(1.0) * total_inter / ( np.spacing(1, dtype=np.float64) + total_union) mIoU = IoU.mean() tbar.set_description('pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU)) else: im_paths = dsts predicts = evaluator.parallel_forward(data) for predict, impath in zip(predicts, im_paths): predict = mx.nd.squeeze(mx.nd.argmax( predict[0], 1)).asnumpy() + testset.pred_offset mask = get_color_pallete(predict, args.dataset) outname = os.path.splitext(impath)[0] + '.png' mask.save(os.path.join(outdir, outname))
def train(): logging.info('Start Training for Task: %s\n' % (task)) # Initialize the net with pretrained model pretrained_net = gluon.model_zoo.vision.get_model(model_name, pretrained=True) finetune_net = gluon.model_zoo.vision.get_model(model_name, classes=task_num_class) finetune_net.features = pretrained_net.features finetune_net.output.initialize(init.Xavier(), ctx=ctx) finetune_net.collect_params().reset_ctx(ctx) finetune_net.hybridize() # Carefully set the 'scale' parameter to make the 'muti-scale train' and 'muti-scale test' train_transform = transforms.Compose([ transforms.RandomResizedCrop(448, scale=(0.76, 1.0), ratio=(0.999, 1.001)), transforms.RandomFlipLeftRight(), transforms.RandomBrightness(0.20), #transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, # saturation=jitter_param), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) train_dataset = my_customdataset.my_custom_dataset( imgroot=os.path.join(".", 'train_valid_allset', task, 'train'), labelmasterpath='label_master.csv') train_data = gluon.data.DataLoader( train_dataset.transform_first(train_transform), batch_size=batch_size, shuffle=True, num_workers=num_workers, last_batch='discard') val_transform = transforms.Compose([ transforms.Resize(480), transforms.CenterCrop(448), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) val_dataset = my_customdataset.my_custom_dataset( imgroot=os.path.join(".", 'train_valid_allset', task, 'val'), labelmasterpath='label_master.csv') val_data = gluon.data.DataLoader( val_dataset.transform_first(val_transform), batch_size=batch_size, shuffle=False, num_workers=num_workers) # Define Trainer use ADam to make mdoel converge quickly trainer = gluon.Trainer(finetune_net.collect_params(), 'adam', {'learning_rate': lr}) metric = mx.metric.Accuracy() L = gluon.loss.SoftmaxCrossEntropyLoss() lr_counter = 0 num_batch = len(train_data) # Start Training best_AP = 0 best_acc = 0 for epoch in range(epochs): train_acc = 0. #### Load the best model when go to the next training stage if epoch == lr_steps[lr_counter]: finetune_net.collect_params().load(best_path, ctx=ctx) trainer.set_learning_rate(trainer.learning_rate * lr_factor) lr_counter += 1 tic = time.time() train_loss = 0 metric.reset() AP = 0. AP_cnt = 0 for i, batch in enumerate(train_data): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False) with ag.record(): outputs = [finetune_net(X) for X in data] loss = [] ###### Handle 'm' label by soft-softmax function ###### for yhat, y in zip(outputs[0], label[0]): loss_1 = 0 if y[1] == 99: # only have y [4,0,0,0,0] loss_1 += L(yhat, y[0]) elif y[2] == 99: #have one m [4,1,0,0,0] loss_1 = 0.8 * L(yhat, y[0]) + 0.2 * L(yhat, y[1]) elif y[3] == 99: #have two m [4,1,3,0,0] loss_1 = 0.7 * L(yhat, y[0]) + 0.15 * L( yhat, y[1]) + 0.15 * L(yhat, y[2]) else: # have many m [4,1,3,2,0] loss_1 = 0.6 * L(yhat, y[0]) + 0.13 * L( yhat, y[1]) + 0.13 * L(yhat, y[2]) + 0.13 * L( yhat, y[3]) loss += [loss_1] #loss = [L(yhat, y) for yhat, y in zip(outputs, label) # for l in loss: # l.backward() ag.backward(loss) # for soft-softmax trainer.step(batch_size) train_loss += sum([l.mean().asscalar() for l in loss]) / len(loss) #train_acc += accuracy(outputs, label) metric.update([label[0][:, 0]], outputs) #ap, cnt = calculate_ap(label, outputs) #AP += ap #AP_cnt += cnt #progressbar(i, num_batch-1) #train_map = AP / AP_cnt _, train_acc = metric.get() train_loss /= num_batch val_acc, val_loss = validate(finetune_net, val_data, ctx) logging.info( '[Epoch %d] Train-acc: %.3f, loss: %.3f | Val-acc: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f' % (epoch, train_acc, train_loss, val_acc, val_loss, time.time() - tic, trainer.learning_rate)) f_val.writelines( '[Epoch %d] Train-acc: %.3f, , loss: %.3f | Val-acc: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n' % (epoch, train_acc, train_loss, val_acc, val_loss, time.time() - tic, trainer.learning_rate)) ### Save the best model every stage if val_acc > best_acc: #best_AP = this_AP best_acc = val_acc if not os.path.exists(os.path.join('.', 'models')): os.makedirs(os.path.join('.', 'models')) if not os.path.exists( os.path.join( '.', 'models', '%s_%s_%s_%s_staging.params' % (task, model_name, epoch, best_acc))): f = open( os.path.join( '.', 'models', '%s_%s_%s_%s_staging.params' % (task, model_name, epoch, best_acc)), 'w') f.close() best_path = os.path.join( '.', 'models', '%s_%s_%s_%s_staging.params' % (task, model_name, epoch, best_acc)) finetune_net.collect_params().save(best_path) logging.info('\n') finetune_net.collect_params().load(best_path, ctx=ctx) f_val.writelines( 'Best val acc is :[Epoch %d] Train-acc: %.3f, loss: %.3f | Best-val-acc: %.3f, loss: %.3f | time: %.1f | learning_rate %.6f\n' % (epoch, train_acc, train_loss, best_acc, val_loss, time.time() - tic, trainer.learning_rate)) return (finetune_net)
plot_name = opt.save_plot_dir logging_handlers = [logging.StreamHandler()] if opt.logging_dir: logging_dir = opt.logging_dir makedirs(logging_dir) logging_handlers.append(logging.FileHandler('%s/train_cifar10_%s.log'%(logging_dir, model_name))) logging.basicConfig(level=logging.INFO, handlers = logging_handlers) logging.info(opt) transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) def label_transform(label, classes): ind = label.astype('int') res = nd.zeros((ind.shape[0], classes), ctx = label.context) res[nd.arange(ind.shape[0], ctx = label.context), ind] = 1 return res def test(ctx, val_data):
def main(): opt = parse_args() batch_size = opt.batch_size classes = 10 log_dir = os.path.join(opt.save_dir, "logs") model_dir = os.path.join(opt.save_dir, "params") if not os.path.exists(model_dir): os.makedirs(model_dir) # Init dataloader jitter_param = 0.4 transform_train = transforms.Compose([ gcv_transforms.RandomCrop(32, pad=4), transforms.RandomFlipLeftRight(), transforms.RandomBrightness(jitter_param), transforms.RandomColorJitter(jitter_param), transforms.RandomContrast(jitter_param), transforms.RandomSaturation(jitter_param), transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010]) ]) train_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=True).transform_first(transform_train), batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=opt.num_workers) val_data = gluon.data.DataLoader( gluon.data.vision.CIFAR10(train=False).transform_first(transform_test), batch_size=batch_size, shuffle=False, num_workers=opt.num_workers) num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] lr_decay = opt.lr_decay lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] + [np.inf] model_name = opt.model model_name = opt.model if model_name.startswith('cifar_wideresnet'): kwargs = {'classes': classes, 'drop_rate': opt.drop_rate} else: kwargs = {'classes': classes} net = get_model(model_name, **kwargs) if opt.resume_from: net.load_parameters(opt.resume_from, ctx=context) optimizer = 'nag' save_period = opt.save_period if opt.save_dir and save_period: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_period = 0 def test(ctx, val_loader): metric = mx.metric.Accuracy() for i, batch in enumerate(val_loader): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) outputs = [net(X) for X in data] metric.update(label, outputs) return metric.get() def train(train_data, val_data, epochs, ctx): if isinstance(ctx, mx.Context): ctx = [ctx] net.hybridize() net.initialize(mx.init.Xavier(), ctx=ctx) net.forward(mx.nd.ones((1, 3, 30, 30), ctx=ctx[0])) with SummaryWriter(logdir=log_dir, verbose=False) as sw: sw.add_graph(net) trainer = gluon.Trainer(net.collect_params(), optimizer, { 'learning_rate': opt.lr, 'wd': opt.wd, 'momentum': opt.momentum }) metric = mx.metric.Accuracy() train_metric = mx.metric.Accuracy() loss_fn = gluon.loss.SoftmaxCrossEntropyLoss() iteration = 0 lr_decay_count = 0 best_val_score = 0 global_step = 0 for epoch in range(epochs): tic = time.time() train_metric.reset() metric.reset() train_loss = 0 num_batch = len(train_data) alpha = 1 if epoch == lr_decay_epoch[lr_decay_count]: trainer.set_learning_rate(trainer.learning_rate * lr_decay) lr_decay_count += 1 tbar = tqdm(train_data) for i, batch in enumerate(tbar): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) with ag.record(): output = [net(X) for X in data] loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)] for l in loss: l.backward() trainer.step(batch_size) train_loss += sum([l.sum().asscalar() for l in loss]) train_metric.update(label, output) name, acc = train_metric.get() iteration += 1 global_step += len(loss) train_loss /= batch_size * num_batch name, acc = train_metric.get() name, val_acc = test(ctx, val_data) if val_acc > best_val_score: best_val_score = val_acc net.save_parameters('{}/{}-{}-{:04.3f}-best.params'.format( model_dir, model_name, epoch, best_val_score)) with SummaryWriter(logdir=log_dir, verbose=False) as sw: sw.add_scalar(tag="TrainLos", value=train_loss, global_step=global_step) sw.add_scalar(tag="TrainAcc", value=acc, global_step=global_step) sw.add_scalar(tag="ValAcc", value=val_acc, global_step=global_step) sw.add_graph(net) logging.info('[Epoch %d] train=%f val=%f loss=%f time: %f' % (epoch, acc, val_acc, train_loss, time.time() - tic)) if save_period and save_dir and (epoch + 1) % save_period == 0: net.save_parameters('{}/{}-{}.params'.format( save_dir, model_name, epoch)) if save_period and save_dir: net.save_parameters('{}/{}-{}.params'.format( save_dir, model_name, epochs - 1)) if opt.mode == 'hybrid': net.hybridize() train(train_data, val_data, opt.num_epochs, context)