def main(): args = get_args() if not os.path.exists(args.save_folder): os.mkdir(args.save_folder) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 rgb_mean = (104, 117, 123) # bgr order num_classes = 2 img_dim = cfg["image_size"] num_gpu = cfg["ngpu"] batch_size = cfg["batch_size"] max_epoch = cfg["epoch"] gpu_train = cfg["gpu_train"] num_workers = args.num_workers momentum = args.momentum weight_decay = args.weight_decay initial_lr = args.lr gamma = args.gamma training_dataset = args.training_dataset save_folder = args.save_folder net = RetinaFace(cfg=cfg) print("Printing net...") print(net) if args.resume_net is not None: print("Loading resume network...") state_dict = torch.load(args.resume_net) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == "module.": name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() net.train() epoch = 0 + args.resume_epoch print("Loading Dataset...") dataset = WiderFaceDetection(training_dataset, preproc(img_dim, rgb_mean)) epoch_size = math.ceil(len(dataset) / batch_size) max_iter = max_epoch * epoch_size stepvalues = (cfg["decay1"] * epoch_size, cfg["decay2"] * epoch_size) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate)) if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg["decay1"]): torch.save( net.state_dict(), save_folder + cfg["name"] + "_epoch_" + str(epoch) + ".pth") epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(initial_lr, optimizer, gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) images = images.cuda() targets = [anno.cuda() for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg["loc_weight"] * loss_l + loss_c + loss_landm loss.backward() optimizer.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) print( "Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} " "|| LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}".format( epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_l.item(), loss_c.item(), loss_landm.item(), lr, batch_time, str(datetime.timedelta(seconds=eta)), )) torch.save(net.state_dict(), save_folder + cfg["name"] + "_Final.pth")
def main(args): # dataset rgb_mean = (104, 117, 123) # bgr order dataset = MyDataset(args.txt_path, args.txt_path2, preproc(args.img_size, rgb_mean)) dataloader = DataLoader(dataset, args.bs, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate, pin_memory=True) # net and load net = RetinaFace(cfg=cfg_mnet) if args.resume_net is not None: print('Loading resume network...') state_dict = load_normal(args.resume_net) net.load_state_dict(state_dict) print('Loading success!') net = net.cuda() if torch.cuda.device_count() >= 1 and args.multi_gpu: net = torch.nn.DataParallel(net) # optimizer and loss optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) scheduler = WarmupCosineSchedule(optimizer, args.warm_epoch, args.max_epoch, len(dataloader), args.cycles) num_classes = 2 criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) # priorbox priorbox = PriorBox(cfg_mnet, image_size=(args.img_size, args.img_size)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() # save folder time_str = datetime.datetime.strftime(datetime.datetime.now(), '%y-%m-%d-%H-%M-%S') args.save_folder = os.path.join(args.save_folder, time_str) if not os.path.exists(args.save_folder): os.makedirs(args.save_folder) logger = logger_init(args.save_folder) logger.info(args) # train for i_epoch in range(args.max_epoch): net.train() for i_iter, data in enumerate(dataloader): load_t0 = time.time() images, targets = data[:2] images = images.cuda() targets = [anno.cuda() for anno in targets] # forward out = net(images) # backward optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg_mnet['loc_weight'] * loss_l + loss_c + loss_landm loss.backward() optimizer.step() scheduler.step() # print info load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (len(dataloader) * (args.max_epoch - i_epoch) - i_iter)) logger.info('Epoch:{}/{} || Iter: {}/{} || ' 'Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || ' 'LR: {:.8f} || ' 'Batchtime: {:.4f} s || ' 'ETA: {}'.format( i_epoch + 1, args.max_epoch, i_iter + 1, len(dataloader), loss_l.item(), loss_c.item(), loss_landm.item(), optimizer.state_dict()['param_groups'][0]['lr'], batch_time, str(datetime.timedelta(seconds=eta)))) if (i_epoch + 1) % args.save_fre == 0: save_name = 'mobile0.25_' + str(i_epoch + 1) + '.pth' torch.save(net.state_dict(), os.path.join(args.save_folder, save_name))
def Train(self): self.setup() cfg = self.system_dict["local"]["cfg"] print(cfg) rgb_mean = (104, 117, 123) # bgr order num_classes = 2 img_dim = cfg['image_size'] num_gpu = cfg['ngpu'] batch_size = cfg['batch_size'] max_epoch = cfg['epoch'] gpu_train = cfg['gpu_train'] num_workers = self.system_dict["params"]["num_workers"] momentum = self.system_dict["params"]["momentum"] weight_decay = self.system_dict["params"]["weight_decay"] initial_lr = self.system_dict["params"]["lr"] gamma = self.system_dict["params"]["gamma"] save_folder = self.system_dict["params"]["save_folder"] print("Loading Network...") net = RetinaFace(cfg=cfg) if self.system_dict["params"]["resume_net"] is not None: print('Loading resume network...') state_dict = torch.load(self.system_dict["params"]["resume_net"]) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True print("Done...") optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() net.train() epoch = 0 + self.system_dict["params"]["resume_epoch"] dataset = self.system_dict["local"]["dataset"] epoch_size = math.ceil(len(dataset) / batch_size) max_iter = max_epoch * epoch_size stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size) step_index = 0 if self.system_dict["params"]["resume_epoch"] > 0: start_iter = self.system_dict["params"]["resume_epoch"] * epoch_size else: start_iter = 0 for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter( data.DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate)) torch.save( net.state_dict(), save_folder + "/" + cfg['name'] + '_intermediate.pth') epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = self.adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size, initial_lr) # load train data images, targets = next(batch_iterator) images = images.cuda() targets = [anno.cuda() for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm loss.backward() optimizer.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) if (iteration % 50 == 0): print( 'Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_l.item(), loss_c.item(), loss_landm.item(), lr, batch_time, str(datetime.timedelta(seconds=eta)))) torch.save(net.state_dict(), save_folder + "/" + cfg['name'] + '_Final.pth')
def train(): # dataset = WiderFaceDetection( training_dataset,preproc(img_dim, rgb_mean), landmark_num) # dataset = Dataset300W(training_dataset, preproc(img_dim, rgb_mean)) dataset = Dataset300W(training_dataset, preproc(img_dim, rgb_mean), landmark_indices) dataloader = data.DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate) net = RetinaFace(cfg=cfg) print("Printing net...") print(net) if args.resume_net is not None: print('Loading resume network...') load_net(net, args.resume_net) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True net.train() epoch = 0 + args.resume_epoch print('Loading Dataset...') optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) criterion = MultiBoxLoss(num_classes, landmark_num, 0.35, True, 0, True, 7, 0.35, False) priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() epoch_size = math.ceil(len(dataset) / batch_size) max_iter = max_epoch * epoch_size stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator batch_iterator = iter(dataloader) if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']): fullname = os.path.join( save_folder, cfg['name'] + '_landmark' + str(landmark_num) + '_epoch_' + str(epoch) + '.pth') torch.save(net.state_dict(), fullname) epoch += 1 load_t0 = time.time() if iteration in stepvalues: step_index += 1 lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) images = images.cuda() targets = [anno.cuda() for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c + cfg[ 'landmark_weight'] * loss_landm loss.backward() optimizer.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) print( 'Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_l.item(), loss_c.item(), loss_landm.item(), lr, batch_time, str(datetime.timedelta(seconds=eta)))) fullname = os.path.join( save_folder, cfg['name'] + '_landmark' + str(landmark_num) + '_Final.pth') torch.save(net.state_dict(), fullname) print("final finished!")
def train(): net = RetinaFace(cfg=cfg) logger.info("Printing net...") logger.info(net) if args.resume_net is not None: logger.info('Loading resume network...') state_dict = torch.load(args.resume_net) # create new OrderedDict that does not contain `module.` from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): head = k[:7] if head == 'module.': name = k[7:] # remove `module.` else: name = k new_state_dict[name] = v net.load_state_dict(new_state_dict) if num_gpu > 1 and gpu_train: net = torch.nn.DataParallel(net).cuda() else: net = net.cuda() cudnn.benchmark = True priorbox = PriorBox(cfg, image_size=(img_dim, img_dim)) with torch.no_grad(): priors = priorbox.forward() priors = priors.cuda() net.train() epoch = 0 + args.resume_epoch logger.info('Loading Dataset...') trainset = WiderFaceDetection(training_dataset, preproc=train_preproc(img_dim, rgb_mean), mode='train') validset = WiderFaceDetection(training_dataset, preproc=valid_preproc(img_dim, rgb_mean), mode='valid') # trainset = WiderFaceDetection(training_dataset, transformers=train_transformers(img_dim), mode='train') # validset = WiderFaceDetection(training_dataset, transformers=valid_transformers(img_dim), mode='valid') trainloader = data.DataLoader(trainset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate) validloader = data.DataLoader(validset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate) logger.info(f'Totally {len(trainset)} training samples and {len(validset)} validating samples.') epoch_size = math.ceil(len(trainset) / batch_size) max_iter = max_epoch * epoch_size logger.info(f'max_epoch: {max_epoch:d} epoch_size: {epoch_size:d}, max_iter: {max_iter:d}') # optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay) optimizer = optim.Adam(net.parameters(), lr=initial_lr, weight_decay=weight_decay) scheduler = _utils.get_linear_schedule_with_warmup(optimizer, int(0.1 * max_iter), max_iter) criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False) stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size) step_index = 0 if args.resume_epoch > 0: start_iter = args.resume_epoch * epoch_size else: start_iter = 0 best_loss_val = float('inf') for iteration in range(start_iter, max_iter): if iteration % epoch_size == 0: # create batch iterator # batch_iterator = iter(tqdm(trainloader, total=len(trainloader))) batch_iterator = iter(trainloader) # if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']): # torch.save(net.state_dict(), save_folder + cfg['name']+ '_epoch_' + str(epoch) + '.pth') epoch += 1 torch.cuda.empty_cache() if (valid_steps > 0) and (iteration > 0) and (iteration % valid_steps == 0): net.eval() # validation loss_l_val = 0. loss_c_val = 0. loss_landm_val = 0. loss_val = 0. # for val_no, (images, targets) in tqdm(enumerate(validloader), total=len(validloader)): for val_no, (images, targets) in enumerate(validloader): # load data images = images.cuda() targets = [anno.cuda() for anno in targets] # forward with torch.no_grad(): out = net(images) loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm loss_l_val += loss_l.item() loss_c_val += loss_c.item() loss_landm_val += loss_landm.item() loss_val += loss.item() loss_l_val /= len(validloader) loss_c_val /= len(validloader) loss_landm_val /= len(validloader) loss_val /= len(validloader) logger.info('[Validating] Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Total: {:.4f} Loc: {:.4f} Cla: {:.4f} Landm: {:.4f}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss_val, loss_l_val, loss_c_val, loss_landm_val)) if loss_val < best_loss_val: best_loss_val = loss_val pth = os.path.join(save_folder, cfg['name'] + '_iter_' + str(iteration) + f'_{loss_val:.4f}_' + '.pth') torch.save(net.state_dict(), pth) logger.info(f'Best validating loss: {best_loss_val:.4f}, model saved as {pth:s})') net.train() load_t0 = time.time() # if iteration in stepvalues: # step_index += 1 # lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size) # load train data images, targets = next(batch_iterator) images = images.cuda() targets = [anno.cuda() for anno in targets] # forward out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c, loss_landm = criterion(out, priors, targets) loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm loss.backward() optimizer.step() scheduler.step() load_t1 = time.time() batch_time = load_t1 - load_t0 eta = int(batch_time * (max_iter - iteration)) if iteration % verbose_steps == 0: logger.info('[Training] Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Total: {:.4f} Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}' .format(epoch, max_epoch, (iteration % epoch_size) + 1, epoch_size, iteration + 1, max_iter, loss.item(), loss_l.item(), loss_c.item(), loss_landm.item(), scheduler.get_last_lr()[-1], batch_time, str(datetime.timedelta(seconds=eta))))