def __init__(self, criterion, metric, device, optimizer_name="adam", lr_scheduler="sqrt", initial_lr=1e-3, epoch_size=1, embed_size=16, hidden_size=256, n_layers=2): super(Model, self).__init__() vocab_size = len(string.printable) self.net = RNN(vocab_size, embed_size, hidden_size, vocab_size, n_layers).to(device) self.criterion = criterion self.metric = metric self.device = device self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr) self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
def testExpDecaying(self): exp_decaying_lr_gamma = 0.66 FLAGS = easydict.EasyDict({ 'lr_scheduler': 'exp_decaying', 'epoch_warmup': 5, '_steps_per_epoch': 1, 'lr': 0.256, 'base_lr': 0.256, 'exp_decay_epoch_interval': 2, 'exp_decaying_lr_gamma': exp_decaying_lr_gamma, 'lr_stepwise': False, }) optimizer = self._setup(FLAGS.lr) lr_scheduler = optim.get_lr_scheduler(optimizer, FLAGS) res = self._step(optimizer, lr_scheduler, 21) self.assertEqual(res[-1], FLAGS.lr * FLAGS.exp_decaying_lr_gamma**10) optimizer = self._setup(FLAGS.lr) FLAGS.lr_scheduler = 'exp_decaying_trunc' lr_scheduler = optim.get_lr_scheduler(optimizer, FLAGS) res = self._step(optimizer, lr_scheduler, 122) self.assertEqual(res[-1], FLAGS.lr * 0.05)
def __init__(self, iterator, criterion, metric, device, optimizer_name="adam", lr_scheduler="sqrt", initial_lr=1e-3, epoch_size=1, embedding_dim=100, hidden_dim=256, output_dim=1, n_layers=2, bidirectional=True, dropout=0.5): """ :param iterator: :param criterion: :param metric: :param device: :param optimizer_name: :param lr_scheduler: :param initial_lr: :param embedding_dim: :param hidden_dim: :param output_dim: :param n_layers: :param bidirectional: :param dropout: """ super(Model, self).__init__() self.device = device self.criterion = criterion self.metric = metric text_field = iterator.dataset.fields['text'] pad_idx = text_field.vocab.stoi[text_field.pad_token] unk_idx = text_field.vocab.stoi[text_field.unk_token] self.net = LSTM(vocab_size=len(text_field.vocab), embedding_dim=embedding_dim, hidden_dim=hidden_dim, output_dim=output_dim, n_layers=n_layers, bidirectional=bidirectional, dropout=dropout, pad_idx=pad_idx).to(device) # initialize embeddings pretrained_embeddings = text_field.vocab.vectors self.net.embedding.weight.data.copy_(pretrained_embeddings) self.net.embedding.weight.data[unk_idx] = torch.zeros(embedding_dim).to(self.device) self.net.embedding.weight.data[pad_idx] = torch.zeros(embedding_dim).to(self.device) # Freeze embedding self.net.embedding.weight.requires_grad = False self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr) self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
def train(): logger = get_logger("./logger") writer = SummaryWriter("./temp.tb") train_loader, val_loader = None, None test_loader = None model = None criterion = None optimizer = get_optimizer(model) scheduler = get_lr_scheduler(optimizer) trainer = Trainer(criterion, optimizer, scheduler, logger, writer) trainer.train_loop(train_loader, val_loader, test_loader, model)
def __init__(self, criterion, metric, device, optimizer_name="adam", lr_scheduler="sqrt", initial_lr=1e-3, epoch_size=1): super(Model, self).__init__() self.net = CNN().to(device) self.criterion = criterion self.metric = metric self.device = device self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr) self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
def __init__(self, criterion, metric, device, input_dimension, num_classes, optimizer_name="adam", lr_scheduler="cyclic", initial_lr=1e-3, epoch_size=1): super(Model, self).__init__() self.criterion = criterion self.metric = metric self.device = device self.net = LinearLayer(input_dimension, num_classes).to(self.device) self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr) self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
def __init__(self, criterion, metric, device, optimizer_name="adam", lr_scheduler="sqrt", initial_lr=1e-3, epoch_size=1, coeff=1): super(Model, self).__init__() self.net = resnet18(pretrained=True) self.net.fc = nn.Linear(self.net.fc.in_features, NUMBER_CLASSES) self.net = self.net.to(device) self.criterion = criterion self.metric = metric self.device = device self.coeff = coeff self.optimizer = get_optimizer(optimizer_name, self.net, initial_lr) self.lr_scheduler = get_lr_scheduler(self.optimizer, lr_scheduler, epoch_size)
def train_val_test(): """Train and val.""" torch.backends.cudnn.benchmark = True # model model, model_wrapper = mc.get_model() ema = mc.setup_ema(model) criterion = torch.nn.CrossEntropyLoss(reduction='none').cuda() criterion_smooth = optim.CrossEntropyLabelSmooth( FLAGS.model_kwparams['num_classes'], FLAGS['label_smoothing'], reduction='none').cuda() # TODO(meijieru): cal loss on all GPUs instead only `cuda:0` when non # distributed if FLAGS.get('log_graph_only', False): if udist.is_master(): _input = torch.zeros(1, 3, FLAGS.image_size, FLAGS.image_size).cuda() _input = _input.requires_grad_(True) mc.summary_writer.add_graph(model_wrapper, (_input, ), verbose=True) return # check pretrained if FLAGS.pretrained: checkpoint = torch.load(FLAGS.pretrained, map_location=lambda storage, loc: storage) if ema: ema.load_state_dict(checkpoint['ema']) ema.to(get_device(model)) # update keys from external models if isinstance(checkpoint, dict) and 'model' in checkpoint: checkpoint = checkpoint['model'] if (hasattr(FLAGS, 'pretrained_model_remap_keys') and FLAGS.pretrained_model_remap_keys): new_checkpoint = {} new_keys = list(model_wrapper.state_dict().keys()) old_keys = list(checkpoint.keys()) for key_new, key_old in zip(new_keys, old_keys): new_checkpoint[key_new] = checkpoint[key_old] logging.info('remap {} to {}'.format(key_new, key_old)) checkpoint = new_checkpoint model_wrapper.load_state_dict(checkpoint) logging.info('Loaded model {}.'.format(FLAGS.pretrained)) optimizer = optim.get_optimizer(model_wrapper, FLAGS) # check resume training if FLAGS.resume: checkpoint = torch.load(os.path.join(FLAGS.resume, 'latest_checkpoint.pt'), map_location=lambda storage, loc: storage) model_wrapper.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) if ema: ema.load_state_dict(checkpoint['ema']) ema.to(get_device(model)) last_epoch = checkpoint['last_epoch'] lr_scheduler = optim.get_lr_scheduler(optimizer, FLAGS) lr_scheduler.last_epoch = (last_epoch + 1) * FLAGS._steps_per_epoch best_val = extract_item(checkpoint['best_val']) train_meters, val_meters = checkpoint['meters'] FLAGS._global_step = (last_epoch + 1) * FLAGS._steps_per_epoch if udist.is_master(): logging.info('Loaded checkpoint {} at epoch {}.'.format( FLAGS.resume, last_epoch)) else: lr_scheduler = optim.get_lr_scheduler(optimizer, FLAGS) # last_epoch = lr_scheduler.last_epoch last_epoch = -1 best_val = 1. train_meters = mc.get_meters('train') val_meters = mc.get_meters('val') FLAGS._global_step = 0 if not FLAGS.resume and udist.is_master(): logging.info(model_wrapper) if FLAGS.profiling: if 'gpu' in FLAGS.profiling: mc.profiling(model, use_cuda=True) if 'cpu' in FLAGS.profiling: mc.profiling(model, use_cuda=False) # data (train_transforms, val_transforms, test_transforms) = dataflow.data_transforms(FLAGS) (train_set, val_set, test_set) = dataflow.dataset(train_transforms, val_transforms, test_transforms, FLAGS) (train_loader, calib_loader, val_loader, test_loader) = dataflow.data_loader(train_set, val_set, test_set, FLAGS) if FLAGS.test_only and (test_loader is not None): if udist.is_master(): logging.info('Start testing.') test_meters = mc.get_meters('test') validate(last_epoch, calib_loader, test_loader, criterion, test_meters, model_wrapper, ema, 'test') return # already broadcast by AllReduceDistributedDataParallel # optimizer load same checkpoint/same initialization if udist.is_master(): logging.info('Start training.') for epoch in range(last_epoch + 1, FLAGS.num_epochs): # train results = run_one_epoch(epoch, train_loader, model_wrapper, criterion_smooth, optimizer, lr_scheduler, ema, train_meters, phase='train') # val results = validate(epoch, calib_loader, val_loader, criterion, val_meters, model_wrapper, ema, 'val') if results['top1_error'] < best_val: best_val = results['top1_error'] if udist.is_master(): save_status(model_wrapper, optimizer, ema, epoch, best_val, (train_meters, val_meters), os.path.join(FLAGS.log_dir, 'best_model.pt')) logging.info( 'New best validation top1 error: {:.4f}'.format(best_val)) if udist.is_master(): # save latest checkpoint save_status(model_wrapper, optimizer, ema, epoch, best_val, (train_meters, val_meters), os.path.join(FLAGS.log_dir, 'latest_checkpoint.pt')) wandb.log( { "Validation Accuracy": 1. - results['top1_error'], "Best Validation Accuracy": 1. - best_val }, step=epoch) # NOTE(meijieru): from scheduler code, should be called after train/val # use stepwise scheduler instead # lr_scheduler.step() return
def train_val_test(): """Train and val.""" torch.backends.cudnn.benchmark = True # For acceleration # model model, model_wrapper = mc.get_model() ema = mc.setup_ema(model) criterion = torch.nn.CrossEntropyLoss(reduction='mean').cuda() criterion_smooth = optim.CrossEntropyLabelSmooth( FLAGS.model_kwparams['num_classes'], FLAGS['label_smoothing'], reduction='mean').cuda() if model.task == 'segmentation': criterion = CrossEntropyLoss().cuda() criterion_smooth = CrossEntropyLoss().cuda() if FLAGS.dataset == 'coco': criterion = JointsMSELoss(use_target_weight=True).cuda() criterion_smooth = JointsMSELoss(use_target_weight=True).cuda() if FLAGS.get('log_graph_only', False): if udist.is_master(): _input = torch.zeros(1, 3, FLAGS.image_size, FLAGS.image_size).cuda() _input = _input.requires_grad_(True) if isinstance(model_wrapper, (torch.nn.DataParallel, udist.AllReduceDistributedDataParallel)): mc.summary_writer.add_graph(model_wrapper.module, (_input, ), verbose=True) else: mc.summary_writer.add_graph(model_wrapper, (_input, ), verbose=True) return # check pretrained if FLAGS.pretrained: checkpoint = torch.load(FLAGS.pretrained, map_location=lambda storage, loc: storage) if ema: ema.load_state_dict(checkpoint['ema']) ema.to(get_device(model)) # update keys from external models if isinstance(checkpoint, dict) and 'model' in checkpoint: checkpoint = checkpoint['model'] if (hasattr(FLAGS, 'pretrained_model_remap_keys') and FLAGS.pretrained_model_remap_keys): new_checkpoint = {} new_keys = list(model_wrapper.state_dict().keys()) old_keys = list(checkpoint.keys()) for key_new, key_old in zip(new_keys, old_keys): new_checkpoint[key_new] = checkpoint[key_old] if udist.is_master(): logging.info('remap {} to {}'.format(key_new, key_old)) checkpoint = new_checkpoint model_wrapper.load_state_dict(checkpoint) if udist.is_master(): logging.info('Loaded model {}.'.format(FLAGS.pretrained)) optimizer = optim.get_optimizer(model_wrapper, FLAGS) # check resume training if FLAGS.resume: checkpoint = torch.load(os.path.join(FLAGS.resume, 'latest_checkpoint.pt'), map_location=lambda storage, loc: storage) model_wrapper = checkpoint['model'].cuda() model = model_wrapper.module # model = checkpoint['model'].module optimizer = checkpoint['optimizer'] for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() # model_wrapper.load_state_dict(checkpoint['model']) # optimizer.load_state_dict(checkpoint['optimizer']) if ema: # ema.load_state_dict(checkpoint['ema']) ema = checkpoint['ema'].cuda() ema.to(get_device(model)) last_epoch = checkpoint['last_epoch'] lr_scheduler = optim.get_lr_scheduler(optimizer, FLAGS, last_epoch=(last_epoch + 1) * FLAGS._steps_per_epoch) lr_scheduler.last_epoch = (last_epoch + 1) * FLAGS._steps_per_epoch best_val = extract_item(checkpoint['best_val']) train_meters, val_meters = checkpoint['meters'] FLAGS._global_step = (last_epoch + 1) * FLAGS._steps_per_epoch if udist.is_master(): logging.info('Loaded checkpoint {} at epoch {}.'.format( FLAGS.resume, last_epoch)) else: lr_scheduler = optim.get_lr_scheduler(optimizer, FLAGS) # last_epoch = lr_scheduler.last_epoch last_epoch = -1 best_val = 1. if not FLAGS.distill: train_meters = mc.get_meters('train', FLAGS.prune_params['method']) val_meters = mc.get_meters('val') else: train_meters = mc.get_distill_meters('train', FLAGS.prune_params['method']) val_meters = mc.get_distill_meters('val') if FLAGS.model_kwparams.task == 'segmentation': best_val = 0. if not FLAGS.distill: train_meters = mc.get_seg_meters('train', FLAGS.prune_params['method']) val_meters = mc.get_seg_meters('val') else: train_meters = mc.get_seg_distill_meters( 'train', FLAGS.prune_params['method']) val_meters = mc.get_seg_distill_meters('val') FLAGS._global_step = 0 if not FLAGS.resume and udist.is_master(): logging.info(model_wrapper) assert FLAGS.profiling, '`m.macs` is used for calculating penalty' # if udist.is_master(): # model.apply(lambda m: print(m)) if FLAGS.profiling: if 'gpu' in FLAGS.profiling: mc.profiling(model, use_cuda=True) if 'cpu' in FLAGS.profiling: mc.profiling(model, use_cuda=False) if FLAGS.dataset == 'cityscapes': (train_set, val_set, test_set) = seg_dataflow.cityscapes_datasets(FLAGS) segval = SegVal(num_classes=19) elif FLAGS.dataset == 'ade20k': (train_set, val_set, test_set) = seg_dataflow.ade20k_datasets(FLAGS) segval = SegVal(num_classes=150) elif FLAGS.dataset == 'coco': (train_set, val_set, test_set) = seg_dataflow.coco_datasets(FLAGS) # print(len(train_set), len(val_set)) # 149813 104125 segval = None else: # data (train_transforms, val_transforms, test_transforms) = dataflow.data_transforms(FLAGS) (train_set, val_set, test_set) = dataflow.dataset(train_transforms, val_transforms, test_transforms, FLAGS) segval = None (train_loader, calib_loader, val_loader, test_loader) = dataflow.data_loader(train_set, val_set, test_set, FLAGS) # get bn's weights if FLAGS.prune_params.use_transformer: FLAGS._bn_to_prune, FLAGS._bn_to_prune_transformer = prune.get_bn_to_prune( model, FLAGS.prune_params) else: FLAGS._bn_to_prune = prune.get_bn_to_prune(model, FLAGS.prune_params) rho_scheduler = prune.get_rho_scheduler(FLAGS.prune_params, FLAGS._steps_per_epoch) if FLAGS.test_only and (test_loader is not None): if udist.is_master(): logging.info('Start testing.') test_meters = mc.get_meters('test') validate(last_epoch, calib_loader, test_loader, criterion, test_meters, model_wrapper, ema, 'test') return # already broadcast by AllReduceDistributedDataParallel # optimizer load same checkpoint/same initialization if udist.is_master(): logging.info('Start training.') for epoch in range(last_epoch + 1, FLAGS.num_epochs): # train results = run_one_epoch(epoch, train_loader, model_wrapper, criterion_smooth, optimizer, lr_scheduler, ema, rho_scheduler, train_meters, phase='train') if (epoch + 1) % FLAGS.eval_interval == 0: # val results, model_eval_wrapper = validate(epoch, calib_loader, val_loader, criterion, val_meters, model_wrapper, ema, 'val', segval, val_set) if FLAGS.prune_params['method'] is not None and FLAGS.prune_params[ 'bn_prune_filter'] is not None: prune_threshold = FLAGS.model_shrink_threshold # 1e-3 masks = prune.cal_mask_network_slimming_by_threshold( get_prune_weights(model_eval_wrapper), prune_threshold ) # get mask for all bn weights (depth-wise) FLAGS._bn_to_prune.add_info_list('mask', masks) flops_pruned, infos = prune.cal_pruned_flops( FLAGS._bn_to_prune) log_pruned_info(mc.unwrap_model(model_eval_wrapper), flops_pruned, infos, prune_threshold) if not FLAGS.distill: if flops_pruned >= FLAGS.model_shrink_delta_flops \ or epoch == FLAGS.num_epochs - 1: ema_only = (epoch == FLAGS.num_epochs - 1) shrink_model(model_wrapper, ema, optimizer, FLAGS._bn_to_prune, prune_threshold, ema_only) model_kwparams = mb.output_network(mc.unwrap_model(model_wrapper)) if udist.is_master(): if FLAGS.model_kwparams.task == 'classification' and results[ 'top1_error'] < best_val: best_val = results['top1_error'] logging.info( 'New best validation top1 error: {:.4f}'.format( best_val)) save_status(model_wrapper, model_kwparams, optimizer, ema, epoch, best_val, (train_meters, val_meters), os.path.join(FLAGS.log_dir, 'best_model')) elif FLAGS.model_kwparams.task == 'segmentation' and FLAGS.dataset != 'coco' and results[ 'mIoU'] > best_val: best_val = results['mIoU'] logging.info('New seg mIoU: {:.4f}'.format(best_val)) save_status(model_wrapper, model_kwparams, optimizer, ema, epoch, best_val, (train_meters, val_meters), os.path.join(FLAGS.log_dir, 'best_model')) elif FLAGS.dataset == 'coco' and results > best_val: best_val = results logging.info('New Result: {:.4f}'.format(best_val)) save_status(model_wrapper, model_kwparams, optimizer, ema, epoch, best_val, (train_meters, val_meters), os.path.join(FLAGS.log_dir, 'best_model')) # save latest checkpoint save_status(model_wrapper, model_kwparams, optimizer, ema, epoch, best_val, (train_meters, val_meters), os.path.join(FLAGS.log_dir, 'latest_checkpoint')) return
def train_val_test(): """Train and val.""" torch.backends.cudnn.benchmark = True # model model, model_wrapper = get_model() criterion = torch.nn.CrossEntropyLoss(reduction='none').cuda() criterion_smooth = optim.CrossEntropyLabelSmooth( FLAGS.model_kwparams['num_classes'], FLAGS['label_smoothing'], reduction='none').cuda() # TODO: cal loss on all GPUs instead only `cuda:0` when non # distributed ema = None if FLAGS.moving_average_decay > 0.0: if FLAGS.moving_average_decay_adjust: moving_average_decay = optim.ExponentialMovingAverage.adjust_momentum( FLAGS.moving_average_decay, FLAGS.moving_average_decay_base_batch / FLAGS.batch_size) else: moving_average_decay = FLAGS.moving_average_decay logging.info('Moving average for model parameters: {}'.format( moving_average_decay)) ema = optim.ExponentialMovingAverage(moving_average_decay) for name, param in model.named_parameters(): ema.register(name, param) # We maintain mva for batch norm moving mean and variance as well. for name, buffer in model.named_buffers(): if 'running_var' in name or 'running_mean' in name: ema.register(name, buffer) if FLAGS.get('log_graph_only', False): if is_root_rank: _input = torch.zeros(1, 3, FLAGS.image_size, FLAGS.image_size).cuda() _input = _input.requires_grad_(True) summary_writer.add_graph(model_wrapper, (_input, ), verbose=True) return # check pretrained if FLAGS.pretrained: checkpoint = torch.load(FLAGS.pretrained, map_location=lambda storage, loc: storage) if ema: ema.load_state_dict(checkpoint['ema']) ema.to(get_device(model)) # update keys from external models if isinstance(checkpoint, dict) and 'model' in checkpoint: checkpoint = checkpoint['model'] if (hasattr(FLAGS, 'pretrained_model_remap_keys') and FLAGS.pretrained_model_remap_keys): new_checkpoint = {} new_keys = list(model_wrapper.state_dict().keys()) old_keys = list(checkpoint.keys()) for key_new, key_old in zip(new_keys, old_keys): new_checkpoint[key_new] = checkpoint[key_old] logging.info('remap {} to {}'.format(key_new, key_old)) checkpoint = new_checkpoint model_wrapper.load_state_dict(checkpoint) logging.info('Loaded model {}.'.format(FLAGS.pretrained)) optimizer = optim.get_optimizer(model_wrapper, FLAGS) # check resume training if FLAGS.resume: checkpoint = torch.load(os.path.join(FLAGS.resume, 'latest_checkpoint.pt'), map_location=lambda storage, loc: storage) model_wrapper.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) if ema: ema.load_state_dict(checkpoint['ema']) ema.to(get_device(model)) last_epoch = checkpoint['last_epoch'] lr_scheduler = optim.get_lr_scheduler(optimizer, FLAGS) lr_scheduler.last_epoch = (last_epoch + 1) * FLAGS._steps_per_epoch best_val = extract_item(checkpoint['best_val']) train_meters, val_meters = checkpoint['meters'] FLAGS._global_step = (last_epoch + 1) * FLAGS._steps_per_epoch if is_root_rank: logging.info('Loaded checkpoint {} at epoch {}.'.format( FLAGS.resume, last_epoch)) else: lr_scheduler = optim.get_lr_scheduler(optimizer, FLAGS) # last_epoch = lr_scheduler.last_epoch last_epoch = -1 best_val = 1. train_meters = get_meters('train') val_meters = get_meters('val') FLAGS._global_step = 0 if not FLAGS.resume and is_root_rank: logging.info(model_wrapper) assert FLAGS.profiling, '`m.macs` is used for calculating penalty' if FLAGS.profiling: if 'gpu' in FLAGS.profiling: profiling(model, use_cuda=True) if 'cpu' in FLAGS.profiling: profiling(model, use_cuda=False) # data (train_transforms, val_transforms, test_transforms) = dataflow.data_transforms(FLAGS) (train_set, val_set, test_set) = dataflow.dataset(train_transforms, val_transforms, test_transforms, FLAGS) (train_loader, calib_loader, val_loader, test_loader) = dataflow.data_loader(train_set, val_set, test_set, FLAGS) # get bn's weights FLAGS._bn_to_prune = prune.get_bn_to_prune(model, FLAGS.prune_params) rho_scheduler = prune.get_rho_scheduler(FLAGS.prune_params, FLAGS._steps_per_epoch) if FLAGS.test_only and (test_loader is not None): if is_root_rank: logging.info('Start testing.') test_meters = get_meters('test') validate(last_epoch, calib_loader, test_loader, criterion, test_meters, model_wrapper, ema, 'test') return # already broadcast by AllReduceDistributedDataParallel # optimizer load same checkpoint/same initialization if is_root_rank: logging.info('Start training.') for epoch in range(last_epoch + 1, FLAGS.num_epochs): # train results = run_one_epoch(epoch, train_loader, model_wrapper, criterion_smooth, optimizer, lr_scheduler, ema, rho_scheduler, train_meters, phase='train') # val results, model_eval_wrapper = validate(epoch, calib_loader, val_loader, criterion, val_meters, model_wrapper, ema, 'val') if FLAGS.prune_params['method'] is not None: prune_threshold = FLAGS.model_shrink_threshold masks = prune.cal_mask_network_slimming_by_threshold( get_prune_weights(model_eval_wrapper), prune_threshold) FLAGS._bn_to_prune.add_info_list('mask', masks) flops_pruned, infos = prune.cal_pruned_flops(FLAGS._bn_to_prune) log_pruned_info(unwrap_model(model_eval_wrapper), flops_pruned, infos, prune_threshold) if flops_pruned >= FLAGS.model_shrink_delta_flops \ or epoch == FLAGS.num_epochs - 1: ema_only = (epoch == FLAGS.num_epochs - 1) shrink_model(model_wrapper, ema, optimizer, FLAGS._bn_to_prune, prune_threshold, ema_only) model_kwparams = mb.output_network(unwrap_model(model_wrapper)) if results['top1_error'] < best_val: best_val = results['top1_error'] if is_root_rank: save_status(model_wrapper, model_kwparams, optimizer, ema, epoch, best_val, (train_meters, val_meters), os.path.join(FLAGS.log_dir, 'best_model')) logging.info( 'New best validation top1 error: {:.4f}'.format(best_val)) if is_root_rank: # save latest checkpoint save_status(model_wrapper, model_kwparams, optimizer, ema, epoch, best_val, (train_meters, val_meters), os.path.join(FLAGS.log_dir, 'latest_checkpoint')) # NOTE: from scheduler code, should be called after train/val # use stepwise scheduler instead # lr_scheduler.step() return
classes=CONFIG.classes, se=True, activation="hswish", l_cfgs_name=CONFIG.model, seg_state=CONFIG.seg_state) if args.load_pretrained: pretrained_dict = load_state_dict(CONFIG.model_pretrained, use_ema=CONFIG.ema) model.load_state_dict(pretrained_dict, strict=False) logging.info("Load pretrained from {} to {}".format( CONFIG.model_pretrained, CONFIG.model)) if (device.type == "cuda" and CONFIG.ngpu >= 1): model = model.to(device) model = nn.DataParallel(model, list(range(CONFIG.ngpu))) optimizer = get_optimizer(model.parameters(), CONFIG.optim_state) criterion = Loss(device, CONFIG) scheduler = get_lr_scheduler(optimizer, len(train_loader), CONFIG) start_time = time.time() trainer = Trainer(criterion, optimizer, scheduler, device, CONFIG) trainer.train_loop(train_loader, test_loader, model, fold) logging.info("Total training time : {:.2f}".format(time.time() - start_time)) logging.info( "=================================== Experiment title : {} End ===========================" .format(args.title))