def build_backbone_via_backend(backend='gluoncv2', name='resnet18', pretrained=True, features=("stage2_resunit1_relu0_fwd", "stage3_resunit1_relu0_fwd", "stage4_resunit1_relu0_fwd"), ctx=mx.cpu(0), **kwargs): if backend == 'gluoncv2': getter = glcv2_get_model elif backend == 'gluoncv': getter = glcv_get_model else: raise ValueError( f'Unknown backend: {backend}, supported: gluoncv, gluoncv2') assert name in gcv2_model_list or name in get_model_list( ), f'{name} not in model list' if isinstance(pretrained, str): # read pretrained weight from path assert os.path.isfile(pretrained) net = getter(name, pretrained=False, ctx=ctx, **kwargs) logging.info(f'load pretrained weight from {pretrained}') net.load_parameters(pretrained, ctx=ctx, allow_missing=True, ignore_extra=True) else: net = getter(name, pretrained=pretrained, ctx=ctx, **kwargs) ipt = [mx.sym.var('data', dtype='float32')] net = FeatureExtractor(net, features, ipt) return net
def _get_supported_models(): all_models = get_model_list() blacklist = ['ssd', 'faster_rcnn', 'mask_rcnn', 'fcn', 'deeplab', 'psp', 'icnet', 'fastscnn', 'danet', 'yolo', 'pose', 'center_net', 'siamrpn', 'monodepth', 'ucf101', 'kinetics', 'voc', 'coco', 'citys', 'mhpv1', 'ade', 'hmdb51', 'sthsth', 'otb'] cls_models = [m for m in all_models if not any(x in m for x in blacklist)] return cls_models
def get_scratch_model(self, model_name, nmbrofclasses): all_models = model_zoo.get_model_list() if model_name in all_models: net = model_zoo.get_model(model_name, pretrained=False, classes=nmbrofclasses) else: print("Model not found, Please refer to the defined models") return None return net
def get_model_zoo_weights(self, path, model_name): all_models = model_zoo.get_model_list() if model_name not in all_models: print( "the model name is not found, please choose a model name from the list below " ) print(all_models) return None else: net = model_zoo.get_model(str(model_name), pretrained=True) return net
def _get_supported_models(): try: import mxnet as _mxnet except ImportError: _mxnet = None if _mxnet is not None: from gluoncv.model_zoo import get_model_list all_models = get_model_list() blacklist = [ 'ssd', 'faster_rcnn', 'mask_rcnn', 'fcn', 'deeplab', 'psp', 'icnet', 'fastscnn', 'danet', 'yolo', 'pose', 'center_net', 'siamrpn', 'monodepth', 'ucf101', 'kinetics', 'voc', 'coco', 'citys', 'mhpv1', 'ade', 'hmdb51', 'sthsth', 'otb' ] cls_models = [ m for m in all_models if not any(x in m for x in blacklist) ] else: cls_models = [] # add timm backend supported models try: import torch as _torch except ImportError: _torch = None try: import timm as _timm except ImportError: _timm = None if _timm is not None: cls_models += list(_timm.list_models()) elif _torch is None: logger.warning('timm installed but torch is required to enable it.') else: logger.warning( 'cannot import timm, possibly due to missing torchvision') return cls_models
def parse_args(): parser = argparse.ArgumentParser(description='Simulate for quantization.') # parser.add_argument('--data-dir', type=str, default='~/.mxnet/datasets', # help='training and validation pictures to use. (default: ~/.mxnet/datasets)') parser.add_argument( '--model', type=str, default=None, help='type of model to use. see vision_model for options. (required)') parser.add_argument('--print-model', action='store_true', help='print the architecture of model.') parser.add_argument('--list-models', action='store_true', help='list all models supported for --model.') parser.add_argument('--use-gpu', type=int, default=-1, help='run model on gpu. (default: cpu)') parser.add_argument('--dataset', type=str, default="imagenet", choices=['imagenet', 'cifar10'], help='dataset to evaluate (default: imagenet)') parser.add_argument('--use-gn', action='store_true', help='whether to use group norm.') parser.add_argument( '--batch-norm', action='store_true', help='enable batch normalization or not in vgg. default is false.') parser.add_argument( '--use-se', action='store_true', help='use SE layers or not in resnext. default is false.') parser.add_argument( '--last-gamma', action='store_true', help= 'whether to init gamma of the last BN layer in each bottleneck to 0.') parser.add_argument( '--merge-bn', action='store_true', help='merge batchnorm into convolution or not. (default: False)') parser.add_argument('--weight-bits-width', type=int, default=8, help='bits width of weight to quantize into.') parser.add_argument( '--input-signed', type=str, default="false", help='quantize inputs into int(true) or uint(fasle). (default: false)') parser.add_argument('--input-bits-width', type=int, default=8, help='bits width of input to quantize into.') parser.add_argument( '--quant-type', type=str, default="layer", choices=['layer', 'group', 'channel'], help='quantize weights on layer/group/channel. (default: layer)') parser.add_argument('-j', '--num-data-workers', dest='num_workers', default=4, type=int, help='number of preprocessing workers (default: 4)') parser.add_argument( '--batch-size', type=int, default=128, help='evaluate batch size per device (CPU/GPU). (default: 128)') parser.add_argument( '--num-sample', type=int, default=5, help='number of samples for every class in trainset. (default: 5)') parser.add_argument( '--quantize-input-offline', action='store_true', help='calibrate via EMA on trainset and quantize input offline.') parser.add_argument('--calib-mode', type=str, default="naive", choices=['naive', 'kl'], help='how to calibrate inputs. (default: naive)') parser.add_argument( '--calib-epoch', type=int, default=3, help='number of epoches to calibrate via EMA on trainset. (default: 3)' ) parser.add_argument( '--disable-cudnn-autotune', action='store_true', help= 'disable mxnet cudnn autotune to find the best convolution algorithm.') parser.add_argument('--eval-per-calib', action='store_true', help='evaluate once after every calibration.') parser.add_argument( '--exclude-first-conv', type=str, default="true", choices=['false', 'true'], help='exclude first convolution layer when quantize. (default: true)') parser.add_argument( '--fixed-random-seed', type=int, default=7, help= 'set random_seed for numpy to provide reproducibility. (default: 7)') parser.add_argument( '--wino_quantize', type=str, default="none", choices=['none', 'F23', 'F43', 'F63'], help='quantize weights for Conv2D in Winograd domain (default: none)') opt = parser.parse_args() if opt.list_models: for key in get_model_list(): print(key) exit(0) elif opt.model is None: print("error: --model is required") print() print('*' * 25 + ' Settings ' + '*' * 25) for k, v in opt.__dict__.items(): print("{0: <25}: {1}".format(k, v)) print('*' * (25 * 2 + len(' Setting '))) print() return opt
def _train_image_classification(args, reporter): """ Parameters ---------- args: <class 'autogluon.utils.edict.EasyDict'> """ tic = time.time() args = args.copy() try: task_id = int(args['task_id']) except: task_id = 0 problem_type = args.pop('problem_type', MULTICLASS) final_fit = args.pop('final_fit', False) # train, val data train_data = args.pop('train_data') val_data = args.pop('val_data') # wall clock tick limit wall_clock_tick = args.pop('wall_clock_tick') log_dir = args.pop('log_dir', os.getcwd()) # exponential batch size for Int() space batch sizes exp_batch_size = args.pop('exp_batch_size', False) if exp_batch_size and 'batch_size' in args: args['batch_size'] = 2 ** args['batch_size'] try: task = args.pop('task') dataset = args.pop('dataset') num_trials = args.pop('num_trials') except KeyError: task = None # mxnet and torch dispatcher dispatcher = None torch_model_list = None mxnet_model_list = None custom_net = None if args.get('custom_net', None): custom_net = args.get('custom_net') if torch and timm: if isinstance(custom_net, torch.nn.Module): dispatcher = 'torch' if mx: if isinstance(custom_net, mx.gluon.Block): dispatcher = 'mxnet' else: if torch and timm: torch_model_list = timm.list_models() if mx: mxnet_model_list = list(get_model_list()) model = args.get('model', None) if model: # timm model has higher priority if torch_model_list and model in torch_model_list: dispatcher = 'torch' elif mxnet_model_list and model in mxnet_model_list: dispatcher = 'mxnet' else: if not torch_model_list: raise ValueError('Model not found in gluoncv model zoo. Install torch and timm if it supports the model.') elif not mxnet_model_list: raise ValueError('Model not found in timm model zoo. Install mxnet if it supports the model.') else: raise ValueError('Model not supported because it does not exist in both timm and gluoncv model zoo.') assert dispatcher in ('torch', 'mxnet'), 'custom net needs to be of type either torch.nn.Module or mx.gluon.Block' args['estimator'] = TorchImageClassificationEstimator if dispatcher=='torch' else ImageClassificationEstimator # convert user defined config to nested form args = config_to_nested(args) if wall_clock_tick < tic and not final_fit: return {'traceback': 'timeout', 'args': str(args), 'time': 0, 'train_acc': -1, 'valid_acc': -1} try: valid_summary_file = 'fit_summary_img_cls.ag' estimator_cls = args.pop('estimator', None) assert estimator_cls in (ImageClassificationEstimator, TorchImageClassificationEstimator) if final_fit: # load from previous dumps estimator = None if os.path.isdir(log_dir): is_valid_dir_fn = lambda d : d.startswith('.trial_') and os.path.isdir(os.path.join(log_dir, d)) trial_dirs = [d for d in os.listdir(log_dir) if is_valid_dir_fn(d)] best_checkpoint = '' best_acc = -1 result = {} for dd in trial_dirs: try: with open(os.path.join(log_dir, dd, valid_summary_file), 'r') as f: result = json.load(f) acc = result.get('valid_acc', -1) if acc > best_acc and os.path.isfile(os.path.join(log_dir, dd, _BEST_CHECKPOINT_FILE)): best_checkpoint = os.path.join(log_dir, dd, _BEST_CHECKPOINT_FILE) best_acc = acc except: pass if best_checkpoint: estimator = estimator_cls.load(best_checkpoint) if estimator is None: if wall_clock_tick < tic: result.update({'traceback': 'timeout'}) else: # unknown error yet, try reproduce it final_fit = False if not final_fit: # create independent log_dir for each trial trial_log_dir = os.path.join(log_dir, '.trial_{}'.format(task_id)) args['log_dir'] = trial_log_dir custom_optimizer = args.pop('custom_optimizer', None) estimator = estimator_cls(args, problem_type=problem_type, reporter=reporter, net=custom_net, optimizer=custom_optimizer) # training result = estimator.fit(train_data=train_data, val_data=val_data, time_limit=wall_clock_tick-tic) with open(os.path.join(trial_log_dir, valid_summary_file), 'w') as f: json.dump(result, f) # save config and result if task is not None: trial_log = {} trial_log.update(args) trial_log.update(result) json_str = json.dumps(trial_log) time_str = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) json_file_name = task + '_dataset-' + dataset + '_trials-' + str(num_trials) + '_' + time_str + '.json' with open(json_file_name, 'w') as json_file: json_file.write(json_str) logging.info('Config and result in this trial have been saved to %s.', json_file_name) except: import traceback return {'traceback': traceback.format_exc(), 'args': str(args), 'time': time.time() - tic, 'train_acc': -1, 'valid_acc': -1} if estimator: result.update({'model_checkpoint': estimator}) result.update({'estimator': estimator_cls}) return result
def _train_image_classification(args, train_data, val_data, problem_type, wall_clock_tick, log_dir, reporter=None): """ Parameters ---------- args: <class 'autogluon.utils.edict.EasyDict'> """ tic = time.time() args = args.copy() try: task_id = int(args['task_id']) except: task_id = 0 final_fit = args.pop('final_fit', False) # exponential batch size for Int() space batch sizes exp_batch_size = args.pop('exp_batch_size', False) if exp_batch_size and 'batch_size' in args: args['batch_size'] = 2 ** args['batch_size'] # mxnet and torch dispatcher dispatcher = None torch_model_list = None mxnet_model_list = None custom_net = None if args.get('custom_net', None): custom_net = args.get('custom_net') if torch and timm: if isinstance(custom_net, torch.nn.Module): dispatcher = 'torch' if mx: if isinstance(custom_net, mx.gluon.Block): dispatcher = 'mxnet' else: if torch and timm: torch_model_list = timm.list_models() if mx: mxnet_model_list = list(get_model_list()) model = args.get('model', None) if model: # timm model has higher priority if torch_model_list and model in torch_model_list: dispatcher = 'torch' elif mxnet_model_list and model in mxnet_model_list: dispatcher = 'mxnet' else: if not torch_model_list: raise ValueError('Model not found in gluoncv model zoo. Install torch and timm if it supports the model.') elif not mxnet_model_list: raise ValueError('Model not found in timm model zoo. Install mxnet if it supports the model.') else: raise ValueError('Model not supported because it does not exist in both timm and gluoncv model zoo.') assert dispatcher in ('torch', 'mxnet'), 'custom net needs to be of type either torch.nn.Module or mx.gluon.Block' if dispatcher == 'mxnet': logger.log(30, '=============================================================================\n' 'WARNING: Using MXNet models in ImagePredictor is deprecated as of v0.4.0 and may contain various bugs and issues!\n' 'In v0.6.0, ImagePredictor will no longer support training MXNet models. Please consider switching to specifying Torch models instead.\n' 'Users should ensure they update their code that depends on ImagePredictor when upgrading to future AutoGluon releases.\n' 'For more information, refer to this GitHub issue: https://github.com/awslabs/autogluon/issues/1560\n' '=============================================================================\n') args['estimator'] = TorchImageClassificationEstimator if dispatcher=='torch' else ImageClassificationEstimator # convert user defined config to nested form args = config_to_nested(args) if wall_clock_tick < tic and not final_fit: return {'traceback': 'timeout', 'args': str(args), 'time': 0, 'train_acc': -1, 'valid_acc': -1} try: valid_summary_file = 'fit_summary_img_cls.ag' estimator_cls = args.pop('estimator', None) assert estimator_cls in (ImageClassificationEstimator, TorchImageClassificationEstimator) if final_fit: # load from previous dumps estimator = None if os.path.isdir(log_dir): is_valid_dir_fn = lambda d : d.startswith('.trial_') and os.path.isdir(os.path.join(log_dir, d)) trial_dirs = [d for d in os.listdir(log_dir) if is_valid_dir_fn(d)] best_checkpoint = '' best_acc = -1 result = {} for dd in trial_dirs: try: with open(os.path.join(log_dir, dd, valid_summary_file), 'r') as f: result = json.load(f) acc = result.get('valid_acc', -1) if acc > best_acc and os.path.isfile(os.path.join(log_dir, dd, _BEST_CHECKPOINT_FILE)): best_checkpoint = os.path.join(log_dir, dd, _BEST_CHECKPOINT_FILE) best_acc = acc except: pass if best_checkpoint: estimator = estimator_cls.load(best_checkpoint) if estimator is None: if wall_clock_tick < tic: result.update({'traceback': 'timeout'}) else: # unknown error yet, try reproduce it final_fit = False if not final_fit: # create independent log_dir for each trial trial_log_dir = os.path.join(log_dir, '.trial_{}'.format(task_id)) args['log_dir'] = trial_log_dir custom_optimizer = args.pop('custom_optimizer', None) estimator = estimator_cls(args, problem_type=problem_type, reporter=reporter, net=custom_net, optimizer=custom_optimizer) # training result = estimator.fit(train_data=train_data, val_data=val_data, time_limit=wall_clock_tick-tic) with open(os.path.join(trial_log_dir, valid_summary_file), 'w') as f: json.dump(result, f) except: import traceback return {'traceback': traceback.format_exc(), 'args': str(args), 'time': time.time() - tic, 'train_acc': -1, 'valid_acc': -1} if estimator: result.update({'model_checkpoint': estimator}) result.update({'estimator': estimator_cls}) return result
import os from gluoncv.model_zoo import get_model, get_model_list root = '/home/public_data/min.du/mxnet_models' if not os.path.exists(root): os.makedirs(root) for name in get_model_list(): get_model(name, pretrained=True, root=root)