def train_proxy(opt): logging.info(opt) # Set random seed mx.random.seed(opt.seed) np.random.seed(opt.seed) # Setup computation context context = get_context(opt.gpus, logging) run_results = [] # Adjust batch size to each compute context batch_size = opt.batch_size * len(context) # Prepare feature extractor if opt.model == 'inception-bn': feature_net, feature_params = get_feature_model(opt.model, ctx=context) data_shape = 224 scale_image_data = False elif opt.model == 'resnet50_v2': feature_net = mx.gluon.model_zoo.vision.resnet50_v2( pretrained=True, ctx=context).features data_shape = 224 scale_image_data = True else: raise RuntimeError('Unsupported model: %s' % opt.model) # Prepare datasets train_dataset, val_dataset = get_dataset(opt.dataset, opt.data_path, data_shape=data_shape, use_crops=opt.use_crops, use_aug=True, with_proxy=True, scale_image_data=scale_image_data) logging.info('Training with %d classes, validating with %d classes' % (train_dataset.num_classes(), val_dataset.num_classes())) if opt.iteration_per_epoch > 0: train_dataset, _ = get_dataset_iterator( opt.dataset, opt.data_path, batch_k=(opt.batch_size // 3) if opt.loss == 'xentropy' else opt.batch_k, batch_size=opt.batch_size, data_shape=data_shape, use_crops=opt.use_crops, scale_image_data=scale_image_data, batchify=False) train_dataloader = mx.gluon.data.DataLoader( DatasetIterator(train_dataset, opt.iteration_per_epoch, 'next_proxy_sample', call_params={ 'sampled_classes': (opt.batch_size // opt.batch_k) if (opt.batch_k is not None) else None, 'chose_classes_randomly': True, }), batch_size=1, shuffle=False, num_workers=opt.num_workers, last_batch='keep') else: train_dataloader = mx.gluon.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=opt.num_workers, last_batch='rollover') val_dataloader = mx.gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=opt.num_workers, last_batch='keep') # Prepare proxy model net = ProxyNet(feature_net, opt.embed_dim, num_classes=train_dataset.num_classes()) if opt.lr is None: logging.info('Using variable learning rate') opt.lr = max([opt.lr_proxynca, opt.lr_embedding, opt.lr_inception]) for p, v in net.encoder.collect_params().items(): v.lr_mult = opt.lr_embedding / opt.lr for p, v in net.base_net.collect_params().items(): v.lr_mult = opt.lr_inception / opt.lr for p, v in net.proxies.collect_params().items(): v.lr_mult = opt.lr_proxynca / opt.lr else: logging.info('Using single learning rate: %f' % opt.lr) for run in range(1, opt.number_of_runs + 1): logging.info('Starting run %d/%d' % (run, opt.number_of_runs)) # reset networks if opt.model == 'inception-bn': net.base_net.collect_params().load(feature_params, ctx=context, ignore_extra=True) if opt.dataset == 'CUB': for v in net.base_net.collect_params().values(): if v.name in ['batchnorm', 'bn_']: v.grad_req = 'null' elif opt.model == 'resnet50_v2': logging.info('Lowering LR for Resnet backbone') net.base_net = mx.gluon.model_zoo.vision.resnet50_v2( pretrained=True, ctx=context).features # Use a smaller learning rate for pre-trained convolutional layers. for v in net.base_net.collect_params().values(): if 'conv' in v.name: setattr(v, 'lr_mult', 0.01) else: raise NotImplementedError('Unknown model: %s' % opt.model) if opt.loss == 'triplet': net.encoder.initialize(mx.init.Xavier(magnitude=0.2), ctx=context, force_reinit=True) net.proxies.initialize(mx.init.Xavier(magnitude=0.2), ctx=context, force_reinit=True) else: net.init(TruncNorm(stdev=0.001), ctx=context, init_basenet=False) if not opt.disable_hybridize: net.hybridize() run_result = train(net, opt, train_dataloader, val_dataloader, context, run) run_results.append(run_result) logging.info('Run %d finished with %f' % (run, run_result[0][1])) logging.info( 'Average validation of %d runs:\n%s' % (opt.number_of_runs, format_results(average_results(run_results))))
def train_normproxy(opt): logging.info(opt) # Set random seed mx.random.seed(opt.seed) np.random.seed(opt.seed) # Setup computation context context = get_context(opt.gpus, logging) # Adjust batch size to each compute context batch_size = opt.batch_size * len(context) run_results = [] # Prepare feature extractor if opt.model == 'inception-bn': feature_net, feature_params = get_feature_model(opt.model, ctx=context) feature_net.collect_params().load(feature_params, ctx=context, ignore_extra=True) data_shape = 224 scale_image_data = False feature_size = 1024 elif opt.model == 'resnet50_v2': feature_params = None feature_net = mx.gluon.model_zoo.vision.resnet50_v2(pretrained=True, ctx=context).features data_shape = 224 scale_image_data = True feature_size = 2048 else: raise RuntimeError('Unsupported model: %s' % opt.model) # Prepare datasets train_dataset, val_dataset = get_dataset(opt.dataset, opt.data_path, data_shape=data_shape, use_crops=opt.use_crops, use_aug=True, with_proxy=True, scale_image_data=scale_image_data) logging.info( 'Training with %d classes, validating with %d classes' % ( train_dataset.num_classes(), val_dataset.num_classes())) if opt.batch_k > 0: train_dataset, _ = get_dataset_iterator(opt.dataset, opt.data_path, batch_k=opt.batch_k, batch_size=batch_size, data_shape=data_shape, use_crops=opt.use_crops, scale_image_data=scale_image_data) train_dataloader = train_dataset else: train_dataloader = mx.gluon.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=opt.num_workers, last_batch='rollover') val_dataloader = mx.gluon.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=opt.num_workers, last_batch='keep') num_train_classes = train_dataset.num_classes() # Prepare proxy model net = NormProxyNet(feature_net, opt.embed_dim, num_classes=train_dataset.num_classes(), feature_size=feature_size, no_fc=opt.no_fc, dropout=opt.dropout, static_proxies=opt.static_proxies) # main run loop for multiple training runs for run in range(1, opt.number_of_runs + 1): logging.info('Starting run %d/%d' % (run, opt.number_of_runs)) # reset networks if opt.model == 'inception-bn': net.base_net.collect_params().load(feature_params, ctx=context, ignore_extra=True) elif opt.model == 'resnet50_v2': net.base_net = mx.gluon.model_zoo.vision.resnet50_v2(pretrained=True, ctx=context).features # Use a smaller learning rate for pre-trained convolutional layers. logging.info('Lowering LR for Resnet backbone by 100x') for v in net.base_net.collect_params().values(): if 'conv' in v.name: setattr(v, 'lr_mult', 0.01) else: raise NotImplementedError('Unknown model: %s' % opt.model) if opt.start_epoch != 1: param_file = 'normproxy_model.params' logging.info('Loading parameters from %s' % param_file) net.load_parameters(param_file, ctx=context) else: if opt.model == 'resnet50_v2': net.init(mx.init.Xavier(magnitude=2), ctx=context, init_basenet=False) else: net.init(TruncNorm(stdev=0.001), ctx=context, init_basenet=False) if not opt.disable_hybridize: net.hybridize() run_result = train(net, opt, train_dataloader, val_dataloader, num_train_classes, context, run) run_results.append(run_result) logging.info('Run %d finished with %f' % (run, run_result[0][1])) logging.info( 'Average validation of %d runs:\n%s' % (opt.number_of_runs, format_results(average_results(run_results))))
def train_clusterloss(opt): logging.info(opt) # Set random seed mx.random.seed(opt.seed) np.random.seed(opt.seed) # Setup computation context context = get_context(opt.gpus, logging) run_results = [] # Get model if opt.model == 'inception-bn': feature_net, feature_params = get_feature_model(opt.model, ctx=context) feature_net.collect_params().load(feature_params, ctx=context, ignore_extra=True) data_shape = 224 scale_image_data = False elif opt.model == 'resnet50_v2': feature_net = mx.gluon.model_zoo.vision.resnet50_v2( pretrained=True, ctx=context).features data_shape = 224 scale_image_data = True else: raise RuntimeError('Unsupported model: %s' % opt.model) net = EmbeddingNet(feature_net, opt.embed_dim, normalize=True) if opt.model == 'resnet50_v2': # Use a smaller learning rate for pre-trained convolutional layers. logging.info('Using smaller conv learning rates') for v in net.base_net.collect_params().values(): if 'conv' in v.name: setattr(v, 'lr_mult', 0.01) # Get iterators train_dataset, val_dataset = get_dataset(opt.dataset, opt.data_path, data_shape=data_shape, use_crops=opt.use_crops, use_aug=True, scale_image_data=scale_image_data) logging.info('Training with %d classes, validating with %d classes' % (train_dataset.num_classes(), val_dataset.num_classes())) if opt.iteration_per_epoch > 0: train_dataset, _ = get_dataset_iterator( opt.dataset, opt.data_path, batch_k=opt.batch_k, batch_size=opt.batch_size, batchify=False, data_shape=data_shape, use_crops=opt.use_crops, scale_image_data=scale_image_data) train_dataloader = mx.gluon.data.DataLoader( DatasetIterator(train_dataset, opt.iteration_per_epoch, 'next'), batch_size=1, shuffle=False, num_workers=opt.num_workers, last_batch='keep') else: train_dataloader = mx.gluon.data.DataLoader( train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, last_batch='rollover') val_dataloader = mx.gluon.data.DataLoader(val_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, last_batch='keep') for run in range(1, opt.number_of_runs + 1): logging.info('Starting run %d/%d' % (run, opt.number_of_runs)) net.init(mx.init.Xavier(magnitude=0.2), ctx=context, init_basenet=False) if opt.model == 'inception-bn': net.base_net.collect_params().load(feature_params, ctx=context, ignore_extra=True) elif opt.model == 'resnet50_v2': net.base_net = mx.gluon.model_zoo.vision.resnet50_v2( pretrained=True, ctx=context).features else: raise RuntimeError('Unsupported model: %s' % opt.model) if not opt.disable_hybridize: net.hybridize() run_result = train(net, opt, train_dataloader, val_dataloader, context, run) run_results.append(run_result) logging.info('Run %d finished with %f' % (run, run_result[0][1])) logging.info( 'Average validation of %d runs:\n%s' % (opt.number_of_runs, format_results(average_results(run_results))))
def train_rankedlist(opt): logging.info(opt) # Settings. mx.random.seed(opt.seed) np.random.seed(opt.seed) # Setup computation context context = get_context(opt.gpus, logging) run_results = [] # Get model if opt.model == 'inception-bn': feature_net, feature_params = get_feature_model(opt.model, ctx=context) feature_net.collect_params().load(feature_params, ctx=context, ignore_extra=True) data_shape = 224 scale_image_data = False elif opt.model == 'resnet50_v2': feature_net = mx.gluon.model_zoo.vision.resnet50_v2( pretrained=True, ctx=context).features data_shape = 224 scale_image_data = True feature_params = None else: raise RuntimeError('Unsupported model: %s' % opt.model) if opt.bottleneck_layers != '': embedding_layers = [int(x) for x in opt.bottleneck_layers.split(',') ] + [opt.embed_dim] else: embedding_layers = [opt.embed_dim] logging.info('Embedding layers: [%s]' % ','.join([str(x) for x in embedding_layers])) if len(embedding_layers) == 1: embedding_layers = embedding_layers[0] net = EmbeddingNet(feature_net, embedding_layers, normalize=True, dropout=False) logging.info(net) if opt.model == 'resnet50_v2': # Use a smaller learning rate for pre-trained convolutional layers. for v in net.base_net.collect_params().values(): if 'conv' in v.name: setattr(v, 'lr_mult', 0.01) elif 'batchnorm' in v.name or 'bn_' in v.name: v.grad_req = 'null' else: for v in net.encoder.collect_params().values(): setattr(v, 'lr_mult', 10.) # Get data iterators train_dataset = DatasetIterator( get_dataset_iterator(opt.dataset, opt.data_path, batch_k=opt.batch_k, batch_size=opt.batch_size, batchify=False, data_shape=data_shape, use_crops=opt.use_crops, scale_image_data=scale_image_data)[0], opt.iteration_per_epoch, 'next') train_dataiterator = mx.gluon.data.DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=opt.num_workers, last_batch='keep') val_dataset = get_dataset(opt.dataset, opt.data_path, data_shape=data_shape, use_crops=opt.use_crops, use_aug=True, scale_image_data=scale_image_data)[1] logging.info( 'Training with %d classes, validating with %d classes' % (train_dataset.data_iterator.num_classes(), val_dataset.num_classes())) val_dataloader = mx.gluon.data.DataLoader(val_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, last_batch='keep') # main run loop for multiple training runs for run in range(1, opt.number_of_runs + 1): logging.info('Starting run %d/%d' % (run, opt.number_of_runs)) net.init(mx.init.Xavier(magnitude=0.2), ctx=context, init_basenet=False) if opt.model == 'inception-bn': net.base_net.collect_params().load(feature_params, ctx=context, ignore_extra=True) elif opt.model == 'resnet50_v2': net.base_net = mx.gluon.model_zoo.vision.resnet50_v2( pretrained=True, ctx=context).features else: raise RuntimeError('Unsupported model: %s' % opt.model) if not opt.disable_hybridize: net.hybridize() run_result = train(net, opt, train_dataiterator, val_dataloader, context, run) run_results.append(run_result) logging.info('Run %d finished with %f' % (run, run_result[0][1])) logging.info( 'Average validation of %d runs:\n%s' % (opt.number_of_runs, format_results(average_results(run_results))))
def train_margin(opt): logging.info(opt) # Set random seed mx.random.seed(opt.seed) np.random.seed(opt.seed) # Setup computation context context = get_context(opt.gpus, logging) # Adjust batch size to each compute context batch_size = opt.batch_size * len(context) run_results = [] # Get model if opt.model == 'inception-bn': feature_net, feature_params = get_feature_model(opt.model, ctx=context) feature_net.collect_params().load(feature_params, ctx=context, ignore_extra=True) data_shape = 224 scale_image_data = False elif opt.model == 'resnet50_v2': feature_params = None feature_net = mx.gluon.model_zoo.vision.resnet50_v2( pretrained=True, ctx=context).features data_shape = 224 scale_image_data = True else: raise RuntimeError('Unsupported model: %s' % opt.model) net = MarginNet(feature_net, opt.embed_dim) if opt.model == 'resnet50_v2': # Use a smaller learning rate for pre-trained convolutional layers. for v in net.base_net.collect_params().values(): if 'conv' in v.name: setattr(v, 'lr_mult', 0.01) # Get data iterators train_dataset, val_dataset = get_dataset(opt.dataset, opt.data_path, data_shape=data_shape, use_crops=opt.use_crops, use_aug=True, scale_image_data=scale_image_data) train_dataiter, _ = get_dataset_iterator(opt.dataset, opt.data_path, batch_k=opt.batch_k, batch_size=batch_size, data_shape=data_shape, use_crops=opt.use_crops, scale_image_data=scale_image_data, batchify=False) train_dataloader = mx.gluon.data.DataLoader(DatasetIterator( train_dataiter, opt.iteration_per_epoch, 'next'), batch_size=1, shuffle=False, num_workers=opt.num_workers, last_batch='keep') val_dataloader = mx.gluon.data.DataLoader(val_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers, last_batch='keep') logging.info('Training with %d classes, validating with %d classes' % (train_dataset.num_classes(), val_dataset.num_classes())) # main run loop for multiple training runs for run in range(1, opt.number_of_runs + 1): logging.info('Starting run %d/%d' % (run, opt.number_of_runs)) # Re-init embedding layers and reload pretrained layers if opt.model == 'inception-bn': net.init(mx.init.Xavier(magnitude=0.2), ctx=context, init_basenet=False) net.base_net.collect_params().load(feature_params, ctx=context, ignore_extra=True) elif opt.model == 'resnet50_v2': net.init(mx.init.Xavier(magnitude=2), ctx=context, init_basenet=False) net.base_net = mx.gluon.model_zoo.vision.resnet50_v2( pretrained=True, ctx=context).features else: raise RuntimeError('Unknown model type: %s' % opt.model) if not opt.disable_hybridize: net.hybridize() if opt.lr_beta > 0.0: logging.info('Learning beta margin') beta = mx.gluon.nn.Embedding(train_dataset.num_classes(), 1) else: beta = opt.beta run_result = train(net, beta, opt, train_dataloader, val_dataloader, batch_size, context, run) run_results.append(run_result) logging.info('Run %d finished with %f' % (run, run_result[0][1])) logging.info( 'Average validation of %d runs:\n%s' % (opt.number_of_runs, format_results(average_results(run_results))))