def train(): data = config['data'] img_size, img_size_test = config['img_size'] if len( config['img_size'] ) == 2 else config['img_size'] * 2 # train, test sizes epochs = config[ 'epochs'] # 500200 batches at bs 64, 117263 images = 273 epochs batch_size = config['batch_size'] accumulate = config[ 'accumulate'] # effective bs = batch_size * accumulate = 16 * 4 = 64 # Initialize init_seeds(config['seed']) if config['multi_scale']: img_sz_min = round(img_size / 32 / 1.5) img_sz_max = round(img_size / 32 * 1.5) img_size = img_sz_max * 32 # initiate with maximum multi_scale size print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size)) # Configure run data_dict = parse_data_cfg(data) nc = int(data_dict['classes']) # number of classes # Initialize Teacher if config['teacher_darknet'] == 'default': teacher = Darknet(cfg=config['teacher_cfg'], arc=config['teacher_arc']).to(device) elif config['teacher_darknet'] == 'soft': teacher = SoftDarknet(cfg=config['teacher_cfg'], arc=config['teacher_arc']).to(device) # Initialize Student if config['student_darknet'] == 'default': if 'nano' in config['student_cfg']: print('Using a YOLO Nano arc') student = YOLO_Nano(config['student_cfg']).to(device) else: student = Darknet(cfg=config['student_cfg']).to(device) elif config['student_darknet'] == 'soft': student = SoftDarknet(cfg=config['student_cfg'], arc=config['student_arc']).to(device) # Create Discriminators D_models = None if len(config['teacher_indexes']): D_models = Discriminator(config['teacher_indexes'], teacher, config['D_kernel_size'], False).to(device) G_optim = create_optimizer(student, config) D_optim = create_optimizer(D_models, config, is_D=True) GAN_criterion = torch.nn.BCEWithLogitsLoss() mask = None if ('mask' in config and config['mask']) or ('mask_path' in config and config['mask_path']): print('Creating mask') mask = create_mask_LTH(teacher).to(device) start_epoch, best_fitness, teacher, student, mask, D_models, G_optim, D_optim = load_kd_checkpoints( config, teacher, student, mask, D_models, G_optim, D_optim, device) if mask is not None: print('Applying mask in teacher') apply_mask_LTH(teacher, mask) del mask torch.cuda.empty_cache() if config['xavier_norm']: initialize_model(student, torch.nn.init.xavier_normal_) elif config['xavier_uniform']: initialize_model(student, torch.nn.init.xavier_uniform_) G_scheduler = create_scheduler(config, G_optim, start_epoch) D_scheduler = create_scheduler(config, D_optim, start_epoch) # Mixed precision training https://github.com/NVIDIA/apex if mixed_precision: student, G_optim = amp.initialize(student, G_scheduler, opt_level='O1', verbosity=0) # Initialize distributed training if device.type != 'cpu' and torch.cuda.device_count( ) > 1 and torch.distributed.is_available(): dist.init_process_group( backend='nccl', # 'distributed backend' init_method= 'tcp://127.0.0.1:9999', # distributed training init method world_size=1, # number of nodes for distributed training rank=0) # distributed training node rank teacher = torch.nn.parallel.DistributedDataParallel( teacher, find_unused_parameters=True) teacher.yolo_layers = teacher.module.yolo_layers # move yolo layer indices to top level student = torch.nn.parallel.DistributedDataParallel( student, find_unused_parameters=True) student.yolo_layers = student.module.yolo_layers # move yolo layer indices to top level trainloader, validloader = create_dataloaders(config) # Start training nb = len(trainloader) prebias = start_epoch == 0 student.nc = nc # attach number of classes to student teacher.nc = nc student.arc = config['student_arc'] # attach yolo architecture teacher.arc = config['teacher_arc'] student.hyp = config['hyp'] # attach hyperparameters to student teacher.hyp = config['hyp'] student.class_weights = labels_to_class_weights( trainloader.dataset.labels, nc).to(device) # attach class weights teacher.class_weights = student.class_weights maps = np.zeros(nc) # mAP per class # torch.autograd.set_detect_anomaly(True) results = ( 0, 0, 0, 0, 0, 0, 0 ) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' t0 = time.time() torch_utils.model_info(student, report='summary') # 'full' or 'summary' print('Starting training for %g epochs...' % epochs) teacher.train() max_wo_best = 0 ############### # Start epoch # ############### for epoch in range(start_epoch, epochs): student.train() student.gr = 1 - (1 + math.cos(min(epoch * 2, epochs) * math.pi / epochs)) / 2 # GIoU <-> 1.0 loss ratio # Prebias if prebias: ne = max(round(30 / nb), 3) # number of prebias epochs ps = np.interp(epoch, [0, ne], [0.1, config['hyp']['lr0'] * 2]), \ np.interp(epoch, [0, ne], [0.9, config['hyp']['momentum']]) # prebias settings (lr=0.1, momentum=0.9) if epoch == ne: print_model_biases(student) prebias = False # Bias optimizer settings G_optim.param_groups[2]['lr'] = ps[0] if G_optim.param_groups[2].get( 'momentum') is not None: # for SGD but not Adam G_optim.param_groups[2]['momentum'] = ps[1] # Update image weights (optional) if trainloader.dataset.image_weights: w = student.class_weights.cpu().numpy() * ( 1 - maps)**2 # class weights image_weights = labels_to_image_weights(trainloader.dataset.labels, nc=nc, class_weights=w) trainloader.dataset.indices = random.choices( range(trainloader.dataset.n), weights=image_weights, k=trainloader.dataset.n) # rand weighted idx mloss = torch.zeros(9).to(device) # mean losses print(('\n' + '%10s' * 13) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'G_loss', 'D_loss', 'D_x', 'D_g_z1', 'D_g_z2', 'total', 'targets', 'img_size')) pbar = tqdm(enumerate(trainloader), total=nb) # progress bar #################### # Start mini-batch # #################### for i, (imgs, targets, paths, _) in pbar: real_data_label = ft(imgs.shape[0], device=device).uniform_(.7, 1.0) fake_data_label = ft(imgs.shape[0], device=device).uniform_(.0, .3) ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device).float( ) / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) # Plot images with bounding boxes if ni < 1: f = config[ 'sub_working_dir'] + 'train_batch%g.png' % i # filename plot_images(imgs=imgs, targets=targets, paths=paths, fname=f) if tb_writer: tb_writer.add_image(f, cv2.imread(f)[:, :, ::-1], dataformats='HWC') # Multi-Scale training if config['multi_scale']: if ni / accumulate % 1 == 0: # adjust img_size (67% - 150%) every 1 batch img_size = random.randrange(img_sz_min, img_sz_max + 1) * 32 sf = img_size / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [ math.ceil(x * sf / 32.) * 32 for x in imgs.shape[2:] ] # new shape (stretched to 32-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Run student if len(config['student_indexes'] ) and epoch < config['second_stage']: pred_std, fts_std = student(imgs, config['student_indexes']) if 'nano' in config[ 'student_cfg']: # YOLO Nano outputs in the reversed order fts_std.reverse() else: pred_std = student(imgs) ################################################### # Update D: maximize log(D(x)) + log(1 - D(G(z))) # ################################################### D_loss_real, D_loss_fake, D_x, D_g_z1 = ft([.0]), ft([.0]), ft( [.0]), ft([.0]) if epoch < config['second_stage']: # Run teacher with torch.no_grad(): _, fts_tch = teacher(imgs, config['teacher_indexes']) # Adding noise to Discriminator: flipping labels if random.random() < .05: aux = real_data_label real_data_label = fake_data_label fake_data_label = aux # Discriminate the real data real_data_discrimination = D_models(fts_tch) for output in real_data_discrimination: D_x += output.mean().item() / 3. # Discriminate the fake data fake_data_discrimination = D_models( [x.detach() for x in fts_std]) for output in fake_data_discrimination: D_g_z1 += output.mean().item() / 3. # Compute loss for x in real_data_discrimination: D_loss_real += GAN_criterion(x.view(-1), real_data_label) for x in fake_data_discrimination: D_loss_fake += GAN_criterion(x.view(-1), fake_data_label) # Scale loss by nominal batch_size of 64 D_loss_real *= batch_size / 64 D_loss_fake *= batch_size / 64 # Compute gradient D_loss_real.backward() D_loss_fake.backward() # Optimize accumulated gradient if ni % accumulate == 0: D_optim.step() D_optim.zero_grad() ################################### # Update G: maximize log(D(G(z))) # ################################### G_loss, D_g_z2 = ft([.0]), ft([.0]) if epoch < config['second_stage']: # Since we already update D, perform another forward with fake batch through D fake_data_discrimination = D_models( [x.detach() for x in fts_std]) for output in fake_data_discrimination: D_g_z2 += output.mean().item() / 3. # Compute loss real_data_label = torch.ones(imgs.shape[0], device=device) for x in fake_data_discrimination: G_loss += GAN_criterion( x.view(-1), real_data_label ) # fake labels are real for generator cost # Scale loss by nominal batch_size of 64 G_loss *= batch_size / 64 # Compute gradient G_loss.backward() # Compute loss obj_detec_loss, loss_items = compute_loss(pred_std, targets, student) # Scale loss by nominal batch_size of 64 obj_detec_loss *= batch_size / 64 if epoch < config['second_stage']: obj_detec_loss *= .05 # Compute gradient obj_detec_loss.backward() # Optimize accumulated gradient if ni % accumulate == 0: G_optim.step() G_optim.zero_grad() D_loss = D_loss_real + D_loss_fake total_loss = obj_detec_loss + D_loss + G_loss all_losses = torch.cat([ loss_items[:3], G_loss, D_loss, D_x, D_g_z1, D_g_z2, total_loss ]).detach() if not torch.isfinite(total_loss): print('WARNING: non-finite loss, ending training ', all_losses) return results # Print batch results mloss = (mloss * i + all_losses) / (i + 1) # update mean losses mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.3g' * 11) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, len(targets), img_size) pbar.set_description(s) ################## # End mini-batch # ################## # Update scheduler G_scheduler.step() D_scheduler.step() final_epoch = epoch + 1 == config['epochs'] if not config['notest'] or final_epoch: # Calculate mAP results, maps = guarantee_test(student, config, device, config['student_cfg'], data, batch_size, img_size_test, validloader, final_epoch, test.test) # Write epoch results with open(config['results_file'], 'a') as f: f.write(s + '%10.3g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) if len(config['name']) and config['bucket']: os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (config['bucket'], config['name'])) # Write Tensorboard results if tb_writer: x = list(mloss) + list(results) titles = [ 'GIoU', 'Objectness', 'Classification', 'Generator Loss', 'Discriminator Loss', 'D_x', 'D_g_z1', 'D_g_z2' 'Train Loss', 'Precision', 'Recall', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' ] for xi, title in zip(x, titles): tb_writer.add_scalar(title, xi, epoch) # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1] if fi > best_fitness: best_fitness = fi max_wo_best = 0 else: max_wo_best += 1 if config['early_stop'] and max_wo_best == config['early_stop']: print('Ending training due to early stop') # Save training results save = (not config['nosave']) or (final_epoch and not config['evolve']) if save: with open(config['results_file'], 'r') as f: # Create checkpoint chkpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': student.module.state_dict() if type(student) is nn.parallel.DistributedDataParallel else student.state_dict(), 'D': D_models.state_dict(), 'G_optim': None if final_epoch else G_optim.state_dict(), 'D_optim': None if final_epoch else D_optim.state_dict() } # Save last checkpoint torch.save(chkpt, config['last']) # Save best checkpoint if best_fitness == fi: torch.save( chkpt, config['best_gan'] if epoch < config['second_stage'] else config['best']) # Delete checkpoint del chkpt torch.cuda.empty_cache() if config['early_stop'] and max_wo_best == config['early_stop']: break ############# # End epoch # ############# n = config['name'] if len(n): n = '_' + n if not n.isnumeric() else n fresults, flast, fbest = 'results%s.txt' % n, 'last%s.pt' % n, 'best%s.pt' % n os.rename(config['results_file'], config['sub_working_dir'] + fresults) os.rename(config['last'], config['sub_working_dir'] + flast) if os.path.exists(config['last']) else None os.rename(config['best'], config['sub_working_dir'] + fbest) if os.path.exists(config['best']) else None # Updating results, last and best config['results_file'] = config['sub_working_dir'] + fresults config['last'] = config['sub_working_dir'] + flast config['best'] = config['sub_working_dir'] + fbest if config['bucket']: # save to cloud os.system('gsutil cp %s gs://%s/results' % (fresults, config['bucket'])) os.system('gsutil cp %s gs://%s/weights' % (config['sub_working_dir'] + flast, config['bucket'])) # os.system('gsutil cp %s gs://%s/weights' % (config['sub_working_dir'] + fbest, config['bucket'])) if not config['evolve']: plot_results(folder=config['sub_working_dir']) print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if torch.cuda.device_count() > 1 else None torch.cuda.empty_cache() return results
def train(): data = config['data'] img_size, img_size_test = config['img_size'] if len(config['img_size']) == 2 else config['img_size'] * 2 # train, test sizes epochs = config['epochs'] # 500200 batches at bs 64, 117263 images = 273 epochs batch_size = config['batch_size'] accumulate = config['accumulate'] # effective bs = batch_size * accumulate = 16 * 4 = 64 # Initialize init_seeds(config['seed']) if config['multi_scale']: img_sz_min = round(img_size / 32 / 1.5) img_sz_max = round(img_size / 32 * 1.5) img_size = img_sz_max * 32 # initiate with maximum multi_scale size print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size)) # Configure run data_dict = parse_data_cfg(data) nc = int(data_dict['classes']) # number of classes config['single_cls'] = nc == 1 # Initialize Teacher if config['teacher_darknet'] == 'default': teacher = Darknet(cfg=config['teacher_cfg'], arc=config['teacher_arc']).to(device) elif config['teacher_darknet'] == 'soft': teacher = SoftDarknet(cfg=config['teacher_cfg'], arc=config['teacher_arc']).to(device) # Initialize Student if config['student_darknet'] == 'default': if 'nano' in config['student_cfg']: print('Using a YOLO Nano arc') student = YOLO_Nano(config['student_cfg']).to(device) else: student = Darknet(cfg=config['student_cfg']).to(device) elif config['student_darknet'] == 'soft': student = SoftDarknet(cfg=config['student_cfg'], arc=config['student_arc']).to(device) # Create Hint Layers hint_models = None if len(config['teacher_indexes']): hint_models = HintModel(config, teacher, student).to(device) optimizer = create_optimizer(student, config) if len(config['teacher_indexes']): add_to_optimizer(config, hint_models, optimizer) HINT = nn.L1Loss() mask = None if ('mask' in config and config['mask']) or ('mask_path' in config and config['mask_path']): print('Creating mask') mask = create_mask_LTH(teacher).to(device) start_epoch, best_fitness, teacher, student, mask, hint_models, optimizer, _ = load_kd_checkpoints( config, teacher, student, mask, hint_models, optimizer, None, device ) if mask is not None: print('Applying mask in teacher') apply_mask_LTH(teacher, mask) del mask torch.cuda.empty_cache() if config['xavier_norm']: initialize_model(student, torch.nn.init.xavier_normal_) elif config['xavier_uniform']: initialize_model(student, torch.nn.init.xavier_uniform_) scheduler = create_scheduler(config, optimizer, start_epoch) # Mixed precision training https://github.com/NVIDIA/apex if mixed_precision: student, optimizer = amp.initialize(student, optimizer, opt_level='O1', verbosity=0) # Initialize distributed training if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available(): dist.init_process_group(backend='nccl', # 'distributed backend' init_method='tcp://127.0.0.1:9999', # distributed training init method world_size=1, # number of nodes for distributed training rank=0) # distributed training node rank teacher = torch.nn.parallel.DistributedDataParallel(teacher, find_unused_parameters=True) teacher.yolo_layers = teacher.module.yolo_layers # move yolo layer indices to top level student = torch.nn.parallel.DistributedDataParallel(student, find_unused_parameters=True) student.yolo_layers = student.module.yolo_layers # move yolo layer indices to top level trainloader, validloader = create_dataloaders(config) # Start training nb = len(trainloader) prebias = start_epoch == 0 student.nc = nc # attach number of classes to student teacher.nc = nc student.arc = config['student_arc'] # attach yolo architecture teacher.arc = config['teacher_arc'] student.hyp = config['hyp'] # attach hyperparameters to student teacher.hyp = config['hyp'] # attach hyperparameters to student mu = ft([h['mu']]) # mu variable to weight the hard lcls and soft lcls in Eq: 2 (value not informed) ni = ft([h['ni']]) # ni variable to weight the teacher bounded regression loss. margin = ft([h['margin']]) # m variable used as margin in teacher bounded regression loss. (value not informed) student.class_weights = labels_to_class_weights(trainloader.dataset.labels, nc).to(device) # attach class weights teacher.class_weights = student.class_weights maps = np.zeros(nc) # mAP per class # torch.autograd.set_detect_anomaly(True) results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' t0 = time.time() torch_utils.model_info(student, report='summary') # 'full' or 'summary' print('Starting training for %g epochs...' % epochs) teacher.eval() max_wo_best = 0 ############### # Start epoch # ############### for epoch in range(start_epoch, epochs): student.train() student.gr = 1 - (1 + math.cos(min(epoch * 2, epochs) * math.pi / epochs)) / 2 # GIoU <-> 1.0 loss ratio # Prebias if prebias: ne = max(round(30 / nb), 3) # number of prebias epochs ps = np.interp(epoch, [0, ne], [0.1, config['hyp']['lr0'] * 2]), \ np.interp(epoch, [0, ne], [0.9, config['hyp']['momentum']]) # prebias settings (lr=0.1, momentum=0.9) if epoch == ne: print_model_biases(student) prebias = False # Bias optimizer settings optimizer.param_groups[2]['lr'] = ps[0] if optimizer.param_groups[2].get('momentum') is not None: # for SGD but not Adam optimizer.param_groups[2]['momentum'] = ps[1] # Update image weights (optional) if trainloader.dataset.image_weights: w = student.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights image_weights = labels_to_image_weights(trainloader.dataset.labels, nc=nc, class_weights=w) trainloader.dataset.indices = random.choices(range(trainloader.dataset.n), weights=image_weights, k=trainloader.dataset.n) # rand weighted idx mloss = torch.zeros(5).to(device) # mean losses print(('\n' + '%10s' * 9) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'hint', 'total', 'targets', 'img_size')) pbar = tqdm(enumerate(trainloader), total=nb) # progress bar #################### # Start mini-batch # #################### for i, (imgs, targets, paths, _) in pbar: ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) # Plot images with bounding boxes if ni < 1: f = config['sub_working_dir'] + 'train_batch%g.png' % i # filename plot_images(imgs=imgs, targets=targets, paths=paths, fname=f) if tb_writer: tb_writer.add_image(f, cv2.imread(f)[:, :, ::-1], dataformats='HWC') # Multi-Scale training if config['multi_scale']: if ni / accumulate % 1 == 0: # adjust img_size (67% - 150%) every 1 batch img_size = random.randrange(img_sz_min, img_sz_max + 1) * 32 sf = img_size / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / 32.) * 32 for x in imgs.shape[2:]] # new shape (stretched to 32-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Run teacher with torch.no_grad(): inf_out, tch_train_output, fts_tch = teacher(imgs, config['teacher_indexes']) tch_loss = compute_loss(tch_train_output, targets, teacher, True) bboxes_tch = non_max_suppression(inf_out, conf_thres=.1, iou_thres=0.6) targets_tch = torch.Tensor() # creating labels from teacher outputs for j, detections in enumerate(bboxes_tch): # a list of detections per image if detections is not None and len(detections): for *xyxy, _, cls_tch in detections: # ignoring the confidence xyxy = torch.Tensor(xyxy) if len(xyxy.shape) == 1: xyxy = xyxy.view(-1, *xyxy.shape) l = torch.Tensor(len(xyxy), 6) # the boxes are unormalized. If not multi_scale, width != height xyxy[:, (0, 2)] /= imgs.shape[2] xyxy[:, (1, 3)] /= imgs.shape[3] l[:, 0] = j # the j-th image l[:, 1] = cls_tch # classes l[:, 2:] = xyxy2xywh(xyxy) # bboxes in darknet format targets_tch = torch.cat([targets_tch, l]) targets_tch = targets_tch.to(device) # Run student pred_std, fts_std = student(imgs, config['student_indexes']) # Run hint layers fts_guided = hint_models(fts_std) ################ # Compute loss # ################ hard_loss = compute_loss(pred_std, targets, student, True) soft_loss = compute_loss(pred_std, targets_tch, student, True) # Loss = Loss Hard + Loss Soft upper_bound_lreg = hard_loss[0] if hard_loss[0] + margin > tch_loss[0] else ft([.0]) lbox = hard_loss[0] + ni * upper_bound_lreg # Equation 4 lobj = hard_loss[1] lcls = mu * hard_loss[2] + (1. - mu) * soft_loss[2] # Equation 2 lhint = torch.cuda.FloatTensor([.0]) for (hint, guided) in zip(fts_tch, fts_guided): lhint += HINT(guided, hint) # Equation 6 loss = lbox + lobj + lcls + lhint loss_items = torch.cat((lbox, lobj, lcls, lhint, loss)).detach() if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss_items) return results # Scale loss by nominal batch_size of 64 loss *= batch_size / 64 # Compute gradient if mixed_precision: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # Optimize accumulated gradient if ni % accumulate == 0: optimizer.step() optimizer.zero_grad() # Print batch results mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, len(targets), img_size) pbar.set_description(s) ################## # End mini-batch # ################## # Update scheduler scheduler.step() final_epoch = epoch + 1 == epochs if not config['notest'] or final_epoch: # Calculate mAP teacher = teacher.to('cpu') hint_models = hint_models.to('cpu') results, maps = guarantee_test( student, config, device, config['cfg'], data, batch_size, img_size_test, validloader, final_epoch, test.test ) teacher = teacher.to(device) hint_models = hint_models.to(device) # Write epoch results with open(config['results_file'], 'a') as f: f.write(s + '%10.3g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) if len(config['name']) and config['bucket']: os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (config['bucket'], config['name'])) # Write Tensorboard results if tb_writer: x = list(mloss) + list(results) titles = ['GIoU', 'Objectness', 'Classification', 'Hint', 'Train loss', 'Precision', 'Recall', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'] for xi, title in zip(x, titles): tb_writer.add_scalar(title, xi, epoch) # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1] if fi > best_fitness: best_fitness = fi max_wo_best = 0 else: max_wo_best += 1 if config['early_stop'] and max_wo_best == config['early_stop']: print('Ending training due to early stop') # Save training results save = (not config['nosave']) or (final_epoch and not config['evolve']) if save: with open(config['results_file'], 'r') as f: # Create checkpoint chkpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': student.module.state_dict() if type(student) is nn.parallel.DistributedDataParallel else student.state_dict(), 'hint': None if hint_models is None else hint_models.module.state_dict() if type(hint_models) is nn.parallel.DistributedDataParallel else hint_models.state_dict(), 'optimizer': None if final_epoch else optimizer.state_dict()} # Save last checkpoint torch.save(chkpt, config['last']) # Save best checkpoint if best_fitness == fi: torch.save(chkpt, config['best']) # Delete checkpoint del chkpt torch.cuda.empty_cache() if config['early_stop'] and max_wo_best == config['early_stop']: break ############# # End epoch # ############# n = config['name'] if len(n): n = '_' + n if not n.isnumeric() else n fresults, flast, fbest = 'results%s.txt' % n, 'last%s.pt' % n, 'best%s.pt' % n os.rename(config['results_file'], config['sub_working_dir'] + fresults) os.rename(config['last'], config['sub_working_dir'] + flast) if os.path.exists(config['last']) else None os.rename(config['best'], config['sub_working_dir'] + fbest) if os.path.exists(config['best']) else None # Updating results, last and best config['results_file'] = config['sub_working_dir'] + fresults config['last'] = config['sub_working_dir'] + flast config['best'] = config['sub_working_dir'] + fbest if config['bucket']: # save to cloud os.system('gsutil cp %s gs://%s/results' % (fresults, config['bucket'])) os.system('gsutil cp %s gs://%s/weights' % (config['sub_working_dir'] + flast, config['bucket'])) # os.system('gsutil cp %s gs://%s/weights' % (config['sub_working_dir'] + fbest, config['bucket'])) if not config['evolve']: plot_results(folder= config['sub_working_dir']) print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if torch.cuda.device_count() > 1 else None torch.cuda.empty_cache() return results
init_seeds(config['seed']) if config['multi_scale']: img_sz_min = round(img_size / 32 / 1.5) img_sz_max = round(img_size / 32 * 1.5) img_size = img_sz_max * 32 # initiate with maximum multi_scale size print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size)) # Configure run data_dict = parse_data_cfg(data) train_path = data_dict['train'] test_path = data_dict['valid'] nc = int(data_dict['classes']) # number of classes # Initialize model model = SoftDarknet(cfg, arc=config['arc']).to(device) optimizer = create_optimizer(model, config) start_epoch = 0 best_fitness = 0.0 start_iteration, start_epoch, best_fitness, model, _, optimizer = load_checkpoints_mask( config, model, None, optimizer, device, attempt_download, load_darknet_weights ) if config['xavier_norm']: initialize_model(model, torch.nn.init.xavier_normal_) elif config['xavier_uniform']: initialize_model(model, torch.nn.init.xavier_uniform_)
def train(): cfg = config['cfg'] data = config['data'] img_size, img_size_test = config['img_size'] if len(config['img_size']) == 2 else config['img_size'] * 2 # train, test sizes epochs = config['epochs'] # 500200 batches at bs 64, 117263 images = 273 epochs batch_size = config['batch_size'] accumulate = config['accumulate'] # effective bs = batch_size * accumulate = 16 * 4 = 64 # Initialize init_seeds(config['seed']) if config['multi_scale']: img_sz_min = round(img_size / 32 / 1.5) img_sz_max = round(img_size / 32 * 1.5) img_size = img_sz_max * 32 # initiate with maximum multi_scale size print('Using multi-scale %g - %g' % (img_sz_min * 32, img_size)) # Configure run data_dict = parse_data_cfg(data) nc = 1 if config['single_cls'] else int(data_dict['classes']) # number of classes # Initialize model if config['darknet'] == 'default': if 'nano' in cfg: model = YOLO_Nano(cfg).to(device) else: model = Darknet(cfg, arc=config['arc']).to(device) elif config['darknet'] == 'soft': model = SoftDarknet(cfg, arc=config['arc']).to(device) optimizer = create_optimizer(model, config) start_epoch, best_fitness, model, optimizer = load_checkpoints( config, model, optimizer, device, attempt_download, load_darknet_weights ) if config['xavier_norm']: initialize_model(model, torch.nn.init.xavier_normal_) elif config['xavier_uniform']: initialize_model(model, torch.nn.init.xavier_uniform_) scheduler = create_scheduler(config, optimizer, start_epoch) # Mixed precision training https://github.com/NVIDIA/apex if mixed_precision: model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0) # # Plot lr schedule # y = [] # for _ in range(epochs): # scheduler.step() # y.append(optimizer.param_groups[0]['lr']) # plt.plot(y, '.-', label='LambdaLR') # plt.xlabel('epoch') # plt.ylabel('LR') # plt.tight_layout() # plt.savefig('LR.png', dpi=300) # Initialize distributed training if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available(): dist.init_process_group(backend='nccl', # 'distributed backend' init_method='tcp://127.0.0.1:9999', # distributed training init method world_size=1, # number of nodes for distributed training rank=0) # distributed training node rank model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True) model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level trainloader, validloader = create_dataloaders(config) # Start training nb = len(trainloader) prebias = start_epoch == 0 model.nc = nc # attach number of classes to model model.arc = config['arc'] # attach yolo architecture model.hyp = config['hyp'] # attach hyperparameters to model model.class_weights = labels_to_class_weights(trainloader.dataset.labels, nc).to(device) # attach class weights maps = np.zeros(nc) # mAP per class # torch.autograd.set_detect_anomaly(True) results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' t0 = time.time() torch_utils.model_info(model, report='summary') # 'full' or 'summary' print('Starting training for %g epochs...' % epochs) max_wo_best = 0 ############### # Start epoch # ############### for epoch in range(start_epoch, epochs): model.train() model.gr = 1 - (1 + math.cos(min(epoch * 2, epochs) * math.pi / epochs)) / 2 # GIoU <-> 1.0 loss ratio # Prebias if prebias: ne = max(round(30 / nb), 3) # number of prebias epochs ps = np.interp(epoch, [0, ne], [0.1, config['hyp']['lr0'] * 2]), \ np.interp(epoch, [0, ne], [0.9, config['hyp']['momentum']]) # prebias settings (lr=0.1, momentum=0.9) if epoch == ne: print_model_biases(model) prebias = False # Bias optimizer settings optimizer.param_groups[2]['lr'] = ps[0] if optimizer.param_groups[2].get('momentum') is not None: # for SGD but not Adam optimizer.param_groups[2]['momentum'] = ps[1] # Update image weights (optional) if trainloader.dataset.image_weights: w = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights image_weights = labels_to_image_weights(trainloader.dataset.labels, nc=nc, class_weights=w) trainloader.dataset.indices = random.choices(range(trainloader.dataset.n), weights=image_weights, k=trainloader.dataset.n) # rand weighted idx mloss = torch.zeros(4).to(device) # mean losses print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size')) pbar = tqdm(enumerate(trainloader), total=nb) # progress bar #################### # Start mini-batch # #################### for i, (imgs, targets, paths, _) in pbar: # for i, (imgs, targets, paths, _) in enumerate(trainloader): ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) # Plot images with bounding boxes if ni < 1: f = config['sub_working_dir'] + 'train_batch%g.png' % i # filename plot_images(imgs=imgs, targets=targets, paths=paths, fname=f) if tb_writer: tb_writer.add_image(f, cv2.imread(f)[:, :, ::-1], dataformats='HWC') # Multi-Scale training if config['multi_scale']: if ni / accumulate % 1 == 0: # adjust img_size (67% - 150%) every 1 batch img_size = random.randrange(img_sz_min, img_sz_max + 1) * 32 sf = img_size / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / 32.) * 32 for x in imgs.shape[2:]] # new shape (stretched to 32-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Run model pred = model(imgs) # Compute loss loss, loss_items = compute_loss(pred, targets, model) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss_items) return results # Scale loss by nominal batch_size of 64 loss *= batch_size / 64 # Compute gradient if mixed_precision: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # Optimize accumulated gradient if ni % accumulate == 0: optimizer.step() optimizer.zero_grad() # Print batch results mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.3g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, len(targets), img_size) pbar.set_description(s) ################## # End mini-batch # ################## # Update scheduler scheduler.step() final_epoch = epoch + 1 == epochs if not config['notest'] or final_epoch: # Calculate mAP is_coco = any([x in data for x in ['coco.data', 'coco2014.data', 'coco2017.data']]) and model.nc == 80 results, maps = test.test( cfg = cfg, data = data, batch_size=batch_size, img_size=img_size_test, model=model, conf_thres=0.001, # 0.001 if opt.evolve or (final_epoch and is_coco) else 0.01, iou_thres=0.6, save_json=final_epoch and is_coco, single_cls=config['single_cls'], dataloader=validloader, folder = config['sub_working_dir'] ) # Write epoch results with open(config['results_file'], 'a') as f: f.write(s + '%10.3g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) if len(config['name']) and config['bucket']: os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (config['bucket'], config['name'])) # Write Tensorboard results if tb_writer: x = list(mloss) + list(results) titles = ['GIoU', 'Objectness', 'Classification', 'Train loss', 'Precision', 'Recall', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification'] for xi, title in zip(x, titles): tb_writer.add_scalar(title, xi, epoch) # Update best mAP fi = fitness(np.array(results).reshape(1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1] if fi > best_fitness: best_fitness = fi max_wo_best = 0 else: max_wo_best += 1 if config['early_stop'] and max_wo_best == config['early_stop']: print('Ending training due to early stop') # Save training results save = (not config['nosave']) or (final_epoch and not config['evolve']) if save: with open(config['results_file'], 'r') as f: # Create checkpoint chkpt = {'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': model.module.state_dict() if type( model) is nn.parallel.DistributedDataParallel else model.state_dict(), 'optimizer': None if final_epoch else optimizer.state_dict()} # Save last checkpoint torch.save(chkpt, config['last']) # Save best checkpoint if best_fitness == fi: torch.save(chkpt, config['best']) # Save backup every 10 epochs (optional) # if epoch > 0 and epoch % 10 == 0: # torch.save(chkpt, config['sub_working_dir'] + 'backup%g.pt' % epoch) # Delete checkpoint del chkpt torch.cuda.empty_cache() if config['early_stop'] and max_wo_best == config['early_stop']: break ############# # End epoch # ############# n = config['name'] if len(n): n = '_' + n if not n.isnumeric() else n fresults, flast, fbest = 'results%s.txt' % n, 'last%s.pt' % n, 'best%s.pt' % n os.rename(config['results_file'], config['sub_working_dir'] + fresults) os.rename(config['last'], config['sub_working_dir'] + flast) if os.path.exists(config['last']) else None os.rename(config['best'], config['sub_working_dir'] + fbest) if os.path.exists(config['best']) else None # Updating results, last and best config['results_file'] = config['sub_working_dir'] + fresults config['last'] = config['sub_working_dir'] + flast config['best'] = config['sub_working_dir'] + fbest if config['bucket']: # save to cloud os.system('gsutil cp %s gs://%s/results' % (fresults, config['bucket'])) os.system('gsutil cp %s gs://%s/weights' % (config['sub_working_dir'] + flast, config['bucket'])) # os.system('gsutil cp %s gs://%s/weights' % (config['sub_working_dir'] + fbest, config['bucket'])) if not config['evolve']: plot_results(folder= config['sub_working_dir']) print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if torch.cuda.device_count() > 1 else None torch.cuda.empty_cache() return results