def get_pnasnet5large(units, load=None): net = glcv2_get_model('pnasnet5large', pretrained=True) with net.name_scope(): in_channels = 4320 net.output = nn.HybridSequential(prefix='') net.output.add(nn.Flatten(), nn.Dropout(rate=0.5), nn.Dense(units=units, in_units=in_channels)) net.output.initialize(mx.init.Xavier()) if load is not None: net.load_parameters(load) net.hybridize(static_alloc=True, static_shape=True) return net
:return: difference """ gluon_output = gluon_output.asnumpy() keras_output = k_model.predict(input_np) error = np.max(gluon_output - keras_output) print('Error:', error) assert error < epsilon return error if __name__ == '__main__': print('Test xception...') # Get a model from gluon cv from gluoncv2.model_provider import get_model as glcv2_get_model net = glcv2_get_model("xception") # Make sure it's hybrid and initialized net.hybridize() net.collect_params().initialize() # Test input input_np = np.random.uniform(0, 1, (1, 3, 299, 299)) gluon_output = net(mx.nd.array(input_np)) # Keras model k_model = gluon2keras(net, [(1, 3, 299, 299)], verbose=True, names='short') error = check_error(gluon_output, k_model, input_np)
def main(): """ Main body of script. """ args = parse_args() # Load a testing image: image = cv2.imread(args.image, flags=cv2.IMREAD_COLOR) # cv2.imshow("image", image) # cv2.waitKey(0) # cv2.destroyAllWindows() image = cv2.cvtColor(image, code=cv2.COLOR_BGR2RGB) # Resize image with keeping aspect ratio: resize_value = int(math.ceil(float(args.input_size) / args.resize_inv_factor)) h, w = image.shape[:2] if not ((w == resize_value and w <= h) or (h == resize_value and h <= w)): resize_size = (resize_value, int(resize_value * h / w)) if w < h else (int(resize_value * w / h), resize_value) image = cv2.resize(image, dsize=resize_size, interpolation=cv2.INTER_LINEAR) # Center crop of the image: h, w = image.shape[:2] th, tw = args.input_size, args.input_size ih = int(round(0.5 * (h - th))) jw = int(round(0.5 * (w - tw))) image = image[ih:(ih + th), jw:(jw + tw), :] # cv2.imshow("image2", image) # cv2.waitKey(0) # cv2.destroyAllWindows() # Convert image to a float tensor and normalize it: x = image.astype(np.float32) x = x / 255.0 x = (x - np.array(args.mean_rgb)) / np.array(args.std_rgb) # Create MXNet context: mx_ctx = [mx.gpu(i) for i in range(args.num_gpus)] if args.num_gpus > 0 else [mx.cpu()] # Convert the tensor to a MXNet nd-array: x = x.transpose(2, 0, 1) x = np.expand_dims(x, axis=0) x = mx.nd.array(x, ctx=mx.cpu()) # Create model with loading pretrained weights: net = glcv2_get_model(args.model, pretrained=True, ctx=mx_ctx) # Evaluate the network: y = net(x) probs = mx.nd.softmax(y) # Show results: top_k = 5 probs_np = probs.asnumpy().squeeze(axis=0) top_k_inds = probs_np.argsort()[::-1][:top_k] classes = ImageNet1kAttr().classes print("The input picture is classified to be:") for k in range(top_k): print("{idx}: [{class_name}], with probability {prob:.3f}.".format( idx=(k + 1), class_name=classes[top_k_inds[k]], prob=probs_np[top_k_inds[k]]))
def main(): opt = parse_args() filehandler = logging.FileHandler(opt.logging_file, mode='a+') # streamhandler = logging.StreamHandler() logger = logging.getLogger('ImageNet') logger.setLevel(level=logging.DEBUG) logger.addHandler(filehandler) # logger.addHandler(streamhandler) logger.info(opt) if opt.amp: amp.init() batch_size = opt.batch_size classes = 1000 num_training_samples = 1281167 num_validating_samples = 50000 num_gpus = opt.num_gpus batch_size *= max(1, num_gpus) context = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()] num_workers = opt.num_workers accumulate = opt.accumulate lr_decay = opt.lr_decay lr_decay_period = opt.lr_decay_period if opt.lr_decay_period > 0: lr_decay_epoch = list( range(lr_decay_period, opt.num_epochs, lr_decay_period)) else: lr_decay_epoch = [int(i) for i in opt.lr_decay_epoch.split(',')] lr_decay_epoch = [e - opt.warmup_epochs for e in lr_decay_epoch] num_batches = num_training_samples // batch_size lr_scheduler = LRSequential([ LRScheduler('linear', base_lr=0, target_lr=opt.lr, nepochs=opt.warmup_epochs, iters_per_epoch=num_batches), LRScheduler(opt.lr_mode, base_lr=opt.lr, target_lr=0, nepochs=opt.num_epochs - opt.warmup_epochs, iters_per_epoch=num_batches, step_epoch=lr_decay_epoch, step_factor=lr_decay, power=2) ]) model_name = opt.model kwargs = {'ctx': context, 'pretrained': opt.use_pretrained} if opt.use_gn: kwargs['norm_layer'] = gcv.nn.GroupNorm if model_name.startswith('vgg'): kwargs['batch_norm'] = opt.batch_norm elif model_name.startswith('resnext'): kwargs['use_se'] = opt.use_se if opt.last_gamma: kwargs['last_gamma'] = True optimizer = 'sgd' optimizer_params = { 'wd': opt.wd, 'momentum': opt.momentum, 'lr_scheduler': lr_scheduler, 'begin_num_update': num_batches * opt.resume_epoch } # if opt.dtype != 'float32': # optimizer_params['multi_precision'] = True # net = get_model(model_name, **kwargs) if opt.model_backend == 'gluoncv': net = glcv_get_model(model_name, **kwargs) elif opt.model_backend == 'gluoncv2': net = glcv2_get_model(model_name, **kwargs) else: raise ValueError(f'Unknown backend: {opt.model_backend}') # net.cast(opt.dtype) if opt.resume_params != '': net.load_parameters(opt.resume_params, ctx=context, cast_dtype=True) # teacher model for distillation training if opt.teacher is not None and opt.hard_weight < 1.0: teacher_name = opt.teacher if opt.teacher_backend == 'gluoncv': teacher = glcv_get_model(teacher_name, **kwargs) elif opt.teacher_backend == 'gluoncv2': teacher = glcv2_get_model(teacher_name, **kwargs) else: raise ValueError(f'Unknown backend: {opt.teacher_backend}') # teacher = glcv2_get_model(teacher_name, pretrained=True, ctx=context) # teacher.cast(opt.dtype) teacher.collect_params().setattr('grad_req', 'null') distillation = True else: distillation = False # Two functions for reading data from record file or raw images def get_data_rec(rec_train, rec_val): rec_train = os.path.expanduser(rec_train) rec_val = os.path.expanduser(rec_val) # mean_rgb = [123.68, 116.779, 103.939] # std_rgb = [58.393, 57.12, 57.375] train_dataset = ImageRecordDataset(filename=rec_train, flag=1) val_dataset = ImageRecordDataset(filename=rec_val, flag=1) return train_dataset, val_dataset def get_data_loader(data_dir): train_dataset = ImageNet(data_dir, train=True) val_dataset = ImageNet(data_dir, train=False) return train_dataset, val_dataset def batch_fn(batch, ctx): data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0) label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0) return data, label if opt.use_rec: train_dataset, val_dataset = get_data_rec(opt.rec_train, opt.rec_val) else: train_dataset, val_dataset = get_data_loader(opt.data_dir) normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) jitter_param = 0.4 lighting_param = 0.1 if not opt.multi_scale: train_dataset = train_dataset.transform_first( transforms.Compose([ transforms.RandomResizedCrop(opt.input_size), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ])) train_data = gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True, last_batch='rollover', num_workers=num_workers) else: train_data = RandomTransformDataLoader( [ Transform( transforms.Compose([ # transforms.RandomResizedCrop(opt.input_size), transforms.RandomResizedCrop(x * 32), transforms.RandomFlipLeftRight(), transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param, saturation=jitter_param), transforms.RandomLighting(lighting_param), transforms.ToTensor(), normalize ])) for x in range(10, 20) ], train_dataset, interval=10 * opt.accumulate, batch_size=batch_size, shuffle=False, pin_memory=True, last_batch='rollover', num_workers=num_workers) val_dataset = val_dataset.transform_first( transforms.Compose([ transforms.Resize(opt.input_size, keep_ratio=True), transforms.CenterCrop(opt.input_size), transforms.ToTensor(), normalize ])) val_data = gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, pin_memory=True, last_batch='keep', num_workers=num_workers) if opt.mixup: train_metric = mx.metric.RMSE() else: train_metric = mx.metric.Accuracy() train_loss_metric = mx.metric.Loss() acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) save_frequency = opt.save_frequency if opt.save_dir and save_frequency: if opt.wandb: save_dir = wandb.run.dir else: save_dir = opt.save_dir makedirs(save_dir) else: save_dir = '' save_frequency = 0 def mixup_transform(label, classes, lam=1, eta=0.0): if isinstance(label, nd.NDArray): label = [label] res = [] for l in label: y1 = l.one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) y2 = l[::-1].one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) res.append(lam * y1 + (1 - lam) * y2) return res def smooth(label, classes, eta=0.1): if isinstance(label, nd.NDArray): label = [label] smoothed = [] for l in label: res = l.one_hot(classes, on_value=1 - eta + eta / classes, off_value=eta / classes) smoothed.append(res) return smoothed def test(ctx, val_data): acc_top1.reset() acc_top5.reset() for i, batch in tqdm.tqdm(enumerate(val_data), desc='Validating', total=num_validating_samples // batch_size): data, label = batch_fn(batch, ctx) # outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] outputs = [net(X) for X in data] acc_top1.update(label, outputs) acc_top5.update(label, outputs) _, top1 = acc_top1.get() _, top5 = acc_top5.get() return 1 - top1, 1 - top5 def train(ctx): if isinstance(ctx, mx.Context): ctx = [ctx] if opt.resume_params == '': import warnings with warnings.catch_warnings(record=True) as w: net.initialize(mx.init.MSRAPrelu(), ctx=ctx) if opt.no_wd: for k, v in net.collect_params('.*beta|.*gamma|.*bias').items(): v.wd_mult = 0.0 if accumulate > 1: logger.info(f'accumulate: {accumulate}, using "add" grad_req') import warnings with warnings.catch_warnings(record=True) as w: net.collect_params().setattr('grad_req', 'add') trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params, update_on_kvstore=False if opt.amp else None) if opt.amp: amp.init_trainer(trainer) if opt.resume_states != '': trainer.load_states(opt.resume_states) if opt.label_smoothing or opt.mixup: sparse_label_loss = False else: sparse_label_loss = True if distillation: L = gcv.loss.DistillationSoftmaxCrossEntropyLoss( temperature=opt.temperature, hard_weight=opt.hard_weight, sparse_label=sparse_label_loss) else: L = gluon.loss.SoftmaxCrossEntropyLoss( sparse_label=sparse_label_loss) best_val_score = 1 err_top1_val, err_top5_val = test(ctx, val_data) logger.info('initial validation: err-top1=%f err-top5=%f' % (err_top1_val, err_top5_val)) for epoch in range(opt.resume_epoch, opt.num_epochs): tic = time.time() train_metric.reset() train_loss_metric.reset() btic = time.time() pbar = tqdm.tqdm(total=num_batches, desc=f'Training [{epoch}]', leave=True) for i, batch in enumerate(train_data): data, label = batch_fn(batch, ctx) if opt.mixup: lam = np.random.beta(opt.mixup_alpha, opt.mixup_alpha) if epoch >= opt.num_epochs - opt.mixup_off_epoch: lam = 1 data = [lam * X + (1 - lam) * X[::-1] for X in data] if opt.label_smoothing: eta = 0.1 else: eta = 0.0 label = mixup_transform(label, classes, lam, eta) elif opt.label_smoothing: hard_label = label label = smooth(label, classes) if distillation: # teacher_prob = [nd.softmax(teacher(X.astype(opt.dtype, copy=False)) / opt.temperature) \ # for X in data] with ag.predict_mode(): teacher_prob = [ nd.softmax( teacher( nd.transpose( nd.image.resize( nd.transpose(X, (0, 2, 3, 1)), size=opt.teacher_imgsize), (0, 3, 1, 2))) / opt.temperature) for X in data ] with ag.record(): # outputs = [net(X.astype(opt.dtype, copy=False)) for X in data] outputs = [net(X) for X in data] if distillation: # loss = [L(yhat.astype('float32', copy=False), # y.astype('float32', copy=False), # p.astype('float32', copy=False)) for yhat, y, p in zip(outputs, label, teacher_prob)] # print([outputs, label, teacher_prob]) loss = [ L(yhat, y, p) for yhat, y, p in zip(outputs, label, teacher_prob) ] else: # loss = [L(yhat, y.astype(opt.dtype, copy=False)) for yhat, y in zip(outputs, label)] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] if opt.amp: with amp.scale_loss(loss, trainer) as scaled_loss: ag.backward(scaled_loss) else: ag.backward(loss) if accumulate > 1: if (i + 1) % accumulate == 0: trainer.step(batch_size * accumulate) net.collect_params().zero_grad() else: trainer.step(batch_size) train_loss_metric.update(0, loss) if opt.mixup: output_softmax = [nd.SoftmaxActivation(out.astype('float32', copy=False)) \ for out in outputs] train_metric.update(label, output_softmax) else: if opt.label_smoothing: train_metric.update(hard_label, outputs) else: train_metric.update(label, outputs) _, loss_score = train_loss_metric.get() train_metric_name, train_metric_score = train_metric.get() samplers_per_sec = batch_size / (time.time() - btic) postfix = f'{samplers_per_sec:.1f} imgs/sec, ' \ f'loss: {loss_score:.4f}, ' \ f'acc: {train_metric_score * 100:.2f}, ' \ f'lr: {trainer.learning_rate:.4e}' if opt.multi_scale: postfix += f', size: {data[0].shape[-1]}' pbar.set_postfix_str(postfix) pbar.update() btic = time.time() if opt.log_interval and not (i + 1) % opt.log_interval: step = epoch * num_batches + i wandb.log( { 'samplers_per_sec': samplers_per_sec, train_metric_name: train_metric_score, 'lr': trainer.learning_rate, 'loss': loss_score }, step=step) logger.info( 'Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f\tlr=%f' % (epoch, i, samplers_per_sec, train_metric_name, train_metric_score, trainer.learning_rate)) pbar.close() train_metric_name, train_metric_score = train_metric.get() throughput = int(batch_size * i / (time.time() - tic)) err_top1_val, err_top5_val = test(ctx, val_data) wandb.log({ 'err1': err_top1_val, 'err5': err_top5_val }, step=epoch * num_batches) logger.info('[Epoch %d] training: %s=%f' % (epoch, train_metric_name, train_metric_score)) logger.info('[Epoch %d] speed: %d samples/sec\ttime cost: %f' % (epoch, throughput, time.time() - tic)) logger.info('[Epoch %d] validation: err-top1=%f err-top5=%f' % (epoch, err_top1_val, err_top5_val)) if err_top1_val < best_val_score: best_val_score = err_top1_val net.save_parameters( '%s/%.4f-imagenet-%s-%d-best.params' % (save_dir, best_val_score, model_name, epoch)) trainer.save_states( '%s/%.4f-imagenet-%s-%d-best.states' % (save_dir, best_val_score, model_name, epoch)) if save_frequency and save_dir and (epoch + 1) % save_frequency == 0: net.save_parameters('%s/imagenet-%s-%d.params' % (save_dir, model_name, epoch)) trainer.save_states('%s/imagenet-%s-%d.states' % (save_dir, model_name, epoch)) if save_frequency and save_dir: net.save_parameters('%s/imagenet-%s-%d.params' % (save_dir, model_name, opt.num_epochs - 1)) trainer.save_states('%s/imagenet-%s-%d.states' % (save_dir, model_name, opt.num_epochs - 1)) if opt.mode == 'hybrid': net.hybridize(static_alloc=True, static_shape=not opt.multi_scale) if distillation: teacher.hybridize(static_alloc=True, static_shape=not opt.multi_scale) train(context)
:return: difference """ gluon_output = gluon_output.asnumpy() keras_output = k_model.predict(input_np) error = np.max(gluon_output - keras_output) print('Error:', error) assert error < epsilon return error if __name__ == '__main__': print('Test resnet10...') # Get a model from gluon cv from gluoncv2.model_provider import get_model as glcv2_get_model net = glcv2_get_model("resnet10") # Make sure it's hybrid and initialized net.hybridize() net.collect_params().initialize() # Test input input_np = np.random.uniform(0, 1, (1, 3, 224, 224)) gluon_output = net(mx.nd.array(input_np)) # Keras model k_model = gluon2keras(net, [(1, 3, 224, 224)], verbose=True, names='short') error = check_error(gluon_output, k_model, input_np)