def __init__(self,ops,device): self.ops = ops self.img_size = ops.img_size self.classes = load_classes(parse_data_cfg(ops.data_cfg)['names']) self.num_classes = len(self.classes) if "tiny" in ops.detect_network: a_scalse = 416./ops.img_size anchors=[(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)] anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ] model = Yolov3Tiny(self.num_classes,anchors = anchors_new) weights = ops.detect_model print('network : yolov3 - tiny') else: a_scalse = 416./ops.img_size anchors=[(10,13), (16,30), (33,23), (30,61), (62,45), (59,119), (116,90), (156,198), (373,326)] anchors_new = [ (int(anchors[j][0]/a_scalse),int(anchors[j][1]/a_scalse)) for j in range(len(anchors)) ] model = Yolov3(self.num_classes,anchors = anchors_new) weights = ops.detect_model print('network : yolov3') self.model = model yolo_model_param(self.model)# 显示模型参数 self.device = device self.use_cuda = torch.cuda.is_available() # Load weights if os.access(weights,os.F_OK):# 判断模型文件是否存在 self.model.load_state_dict(torch.load(weights, map_location=self.device)['model']) else: print('------- >>> error model not exists') return False self.model.to(self.device).eval()#模型设置为 eval
def main(): args = create_prune_argparser() config = create_config(args) # Initialize init_seeds(seed=0) model = Darknet(cfg=config['cfg'], arc=config['arc']) mask = create_mask(model) bckp = create_backup(model) device = select_device(config['device']) model = model.to(device) # print('Making forwards by 100 iterations') # mask = mask.to(device) # x = torch.Tensor(10, 3, 416, 416).to(device) # for i in range(100): # out = model(x) # exit() data_dict = parse_data_cfg(config['data']) train_path = data_dict['train'] dataset = LoadImagesAndLabels( path=train_path, img_size=config['img_size'][0], batch_size=config['batch_size'], augment=True, hyp=config['hyp'], cache_images=config['cache_images'], ) # Dataloader nw = min([os.cpu_count(), 18 if 18 > 1 else 0, 8]) # number of workers dataloader = torch.utils.data.DataLoader(dataset, batch_size=18, num_workers=nw, pin_memory=True, collate_fn=dataset.collate_fn) # torch.cuda.empty_cache() imgs, _, _, _ = next(iter(dataloader)) imgs = imgs.float() / 255.0 imgs = imgs.to(device) start = datetime.datetime.now() print(f'Starting to compute the time at {start}') for i in range(10): prune_on_cpu(model, mask, bckp, imgs, config, device) end = datetime.datetime.now() print(f'Ending at {end}') result = end - start print(f'Time of {result}')
def run_compare(cfg, data, prune_cfg, batch_size, origin_weights): device = select_device('', apex=None, batch_size=batch_size) if device.type != 'cpu' and torch.cuda.device_count() > 1: dist.init_process_group( backend='nccl', # 'distributed backend' init_method= 'tcp://127.0.0.1:9999', # distributed training init method world_size=1, # number of nodes for distributed training rank=0) # distributed training node rank init_seeds() data_dict = parse_data_cfg(data) train_path = data_dict['valid'] dataset = LoadImagesAndLabels( train_path, 416, batch_size, augment=True, hyp=hyp, # augmentation hyperparameters rect=False, # rectangular training cache_labels=True, cache_images=False) batch_size = min(batch_size, len(dataset)) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers train_loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, num_workers=nw, shuffle=True, # Shuffle=True unless rectangular training is used pin_memory=True, collate_fn=dataset.collate_fn) origin_model = Darknet(cfg).to(device) chkpt = torch.load(origin_weights, map_location=device) origin_model.load_state_dict(chkpt['model'], strict=True) aux_util = AuxNetUtils(origin_model, hyp) del chkpt for layer in aux_util.pruning_layer[1:]: # greedy_channel_select(origin_model, prune_cfg, origin_weights, layer, device, aux_util, train_loader, 0.75) random_greedy_channel_select(origin_model, prune_cfg, origin_weights, layer, device, aux_util, train_loader, 0.75)
def run_single_detect(model, images, img_size, conf_thres=0.3, nms_thres=0.45): device = torch_utils.select_device() dataloader = LoadImages(images, img_size=img_size) classes = load_classes(parse_data_cfg('cfg/coco.data')['names']) for i, (path, img, im0) in enumerate(dataloader): img = torch.from_numpy(img).unsqueeze(0).to(device) pred = model(img) pred = pred[pred[:, :, 4] > conf_thres] # remove boxes < threshold if len(pred) > 0: # Run NMS on predictions detections = non_max_suppression(pred.unsqueeze(0), conf_thres, nms_thres)[0] # Print results to screen unique_classes = detections[:, -1].cpu().unique() for c in unique_classes: n = (detections[:, -1].cpu() == c).sum() print('%g %ss' % (n, classes[int(c)]), end=', ')
def test(cfg, data, batch_size, img_size, conf_thres, iou_thres, nms_thres, src_txt_path, weights, log_file_path=None, model=None): # 0、初始化一些参数 data = parse_data_cfg(data) nc = int(data['classes']) # number of classes names = load_classes(data['names']) # 1、加载网络 if model is None: device = select_device('0') model = Darknet(cfg) if weights.endswith('.pt'): # TODO: .weights权重格式 model.load_state_dict( torch.load(weights, map_location=device)['model'] ) # 20200704_50epoch_modify_noobj # TODO:map_location=device ? if torch.cuda.device_count() > 1: model = nn.DataParallel(model) # clw note: 多卡 else: device = next(model.parameters()).device # get model device model.to(device).eval() # 2、加载数据集 test_dataset = VocDataset(src_txt_path, img_size, with_label=True, is_training=False) dataloader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, # TODO collate_fn=test_dataset.test_collate_fn, # TODO pin_memory=True) # 3、预测,前向传播 image_nums = 0 s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@{}'.format(iou_thres), 'F1') #s = ('%20s' + '%10s' * 6) % ('Class', 'ImgNum', 'Target', 'P', 'R', '[email protected]', 'F1') p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0. jdict, stats, ap, ap_class = [], [], [], [] pbar = tqdm(dataloader) for i, (img_tensor, target_tensor, _, _) in enumerate(pbar): img_tensor = img_tensor.to(device) # (bs, 3, 416, 416) target_tensor = target_tensor.to(device) height, width = img_tensor.shape[2:] start = time.time() # Disable gradients with torch.no_grad(): # (1) Run model output = model( img_tensor ) # (x1, y1, x2, y2, obj_conf, class_conf, class_pred) # (2) NMS nms_output = non_max_suppression(output, conf_thres, nms_thres) s = 'time use per batch: %.3fs' % (time.time() - start) pbar.set_description(s) for batch_idx, pred in enumerate(nms_output): # pred: (bs, 7) labels = target_tensor[target_tensor[:, 0] == batch_idx, 1:] nl = len(labels) # len of label tcls = labels[:, 0].tolist() if nl else [] # target class image_nums += 1 # 考虑一个预测 box 都没有的情况,比如 conf 太高 if pred is None: if nl: stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) continue # Clip boxes to image bounds TODO:有必要,因为 label 都是经过clip的,所以如果去掉clip,mAP应该会有所降低 clip_coords(pred, (height, width)) # mAP is the same # Assign all predictions as incorrect correct = [0] * len(pred) if nl: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= img_tensor[batch_idx].size()[2] # w tbox[:, [1, 3]] *= img_tensor[batch_idx].size()[1] # h # Search for correct predictions for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == nl: break # Continue if predicted class not among image classes if pcls.item() not in tcls: continue # Best iou, index between pred and targets m = (pcls == tcls_tensor).nonzero().view(-1) iou, bi = bbox_iou(pbox, tbox[m]).max(0) # If iou > threshold and class is correct mark as correct if iou > iou_thres and m[ bi] not in detected: # and pcls == tcls[bi]: correct[i] = 1 detected.append(m[bi]) # print('stats.append: ', (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) ''' pred flag ( [1, 0, 1, 0, 0, 1, 0, 0, 1], pred conf tensor([0.17245, 0.14642, 0.07215, 0.07138, 0.07069, 0.06449, 0.06222, 0.05580, 0.05452]), pred cls tensor([2., 2., 2., 2., 2., 2., 2., 2., 2.]), lb_cls [2.0, 2.0, 2.0, 2.0, 2.0]) stats is a [] ''' stats.append( (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) # Append statistics (correct, conf, pcls, tcls) # after get stats for all images , ... # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # to numpy if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results # time.sleep(0.01) # clw note: 防止前面 tqdm 还没输出,但是这里已经打印了 #pf = '%20s' + '%10.3g' * 6 # print format pf = '%20s' + '%10s' + '%10.3g' * 5 pf_value = pf % ('all', str(image_nums), nt.sum(), mp, mr, map, mf1) print(pf_value) if __name__ != '__main__': write_to_file(s, log_file_path) write_to_file(pf_value, log_file_path) results = [] results.append({"all": (mp, mr, map, mf1)}) # Print results per class #if verbose and nc > 1 and len(stats): if nc > 1 and len(stats): for i, c in enumerate(ap_class): #print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) print(pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i])) if __name__ != '__main__': write_to_file( pf % (names[c], '', nt[c], p[i], r[i], ap[i], f1[i]), log_file_path) results.append({names[c]: (p[i], r[i], ap[i], f1[i])}) # Return results maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map, mf1), maps
def train(): cfg = opt.cfg data = opt.data epochs = opt.epochs # 500200 batches at bs 64, 117263 images = 273 epochs batch_size = opt.batch_size accumulate = max(round(64 / batch_size), 1) # accumulate n times before optimizer update (bs 64) weights = opt.weights # initial training weights imgsz_min, imgsz_max, imgsz_test = opt.img_size # img sizes (min, max, test) # Image Sizes gs = 64 # (pixels) grid size assert math.fmod( imgsz_min, gs) == 0, '--img-size %g must be a %g-multiple' % (imgsz_min, gs) opt.multi_scale |= imgsz_min != imgsz_max # multi if different (min, max) if opt.multi_scale: if imgsz_min == imgsz_max: imgsz_min //= 1.5 imgsz_max //= 0.667 grid_min, grid_max = imgsz_min // gs, imgsz_max // gs imgsz_min, imgsz_max = grid_min * gs, grid_max * gs img_size = imgsz_max # initialize with max size # Configure run init_seeds() data_dict = parse_data_cfg(data) train_path = data_dict['train'] test_path = data_dict['valid'] nc = 1 if opt.single_cls else int( data_dict['classes']) # number of classes hyp['cls'] *= nc / 80 # update coco-tuned hyp['cls'] to current dataset # Remove previous results for f in glob.glob('*_batch*.jpg') + glob.glob(results_file): os.remove(f) # Initialize model model = Darknet(cfg).to(device) # Optimizer pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in dict(model.named_parameters()).items(): if '.bias' in k: pg2 += [v] # biases elif 'Conv2d.weight' in k: pg1 += [v] # apply weight_decay else: pg0 += [v] # all else if opt.adam: # hyp['lr0'] *= 0.1 # reduce lr (i.e. SGD=5E-3, Adam=5E-4) optimizer = optim.Adam(pg0, lr=hyp['lr0']) # optimizer = AdaBound(pg0, lr=hyp['lr0'], final_lr=0.1) else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) del pg0, pg1, pg2 start_epoch = 0 best_fitness = 0.0 # attempt_download(weights) if weights.endswith('.pt'): # pytorch format chkpt = torch.load(weights, map_location=device) # load model try: chkpt['model'] = { k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel() } model.load_state_dict(chkpt['model'], strict=False) except KeyError as e: s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \ "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights) raise KeyError(s) from e # load optimizer if chkpt['optimizer'] is not None: optimizer.load_state_dict(chkpt['optimizer']) best_fitness = chkpt['best_fitness'] # load results if chkpt.get('training_results') is not None: with open(results_file, 'w') as file: file.write(chkpt['training_results']) # write results.txt start_epoch = chkpt['epoch'] + 1 del chkpt elif len(weights) > 0: # darknet format load_darknet_weights(model, weights) # Mixed precision training https://github.com/NVIDIA/apex if mixed_precision: model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: (( (1 + math.cos(x * math.pi / epochs)) / 2)**1.0) * 0.95 + 0.05 # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) scheduler.last_epoch = start_epoch - 1 # see link below # https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822 # Plot lr schedule # y = [] # for _ in range(epochs): # scheduler.step() # y.append(optimizer.param_groups[0]['lr']) # plt.plot(y, '.-', label='LambdaLR') # plt.xlabel('epoch') # plt.ylabel('LR') # plt.tight_layout() # plt.savefig('LR.png', dpi=300) # Initialize distributed training if device.type != 'cpu' and torch.cuda.device_count( ) > 1 and torch.distributed.is_available(): dist.init_process_group( backend='nccl', # 'distributed backend' init_method= 'tcp://127.0.0.1:9999', # distributed training init method world_size=1, # number of nodes for distributed training rank=0) # distributed training node rank model = torch.nn.parallel.DistributedDataParallel( model, find_unused_parameters=True) model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level # Dataset dataset = LoadImagesAndLabels( train_path, img_size, batch_size, augment=True, hyp=hyp, # augmentation hyperparameters rect=opt.rect, # rectangular training cache_images=opt.cache_images, single_cls=opt.single_cls) # Dataloader batch_size = min(batch_size, len(dataset)) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, num_workers=nw, shuffle=not opt. rect, # Shuffle=True unless rectangular training is used pin_memory=True, collate_fn=dataset.collate_fn) # Testloader testloader = torch.utils.data.DataLoader(LoadImagesAndLabels( test_path, imgsz_test, batch_size, hyp=hyp, rect=True, cache_images=opt.cache_images, single_cls=opt.single_cls), batch_size=batch_size, num_workers=nw, pin_memory=True, collate_fn=dataset.collate_fn) # Model parameters model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) model.class_weights = labels_to_class_weights(dataset.labels, nc).to( device) # attach class weights # Model EMA ema = torch_utils.ModelEMA(model) # Start training nb = len(dataloader) # number of batches n_burn = max(3 * nb, 500) # burn-in iterations, max(3 epochs, 500 iterations) maps = np.zeros(nc) # mAP per class # torch.autograd.set_detect_anomaly(True) results = ( 0, 0, 0, 0, 0, 0, 0 ) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' t0 = time.time() print('Image sizes %g - %g train, %g test' % (imgsz_min, imgsz_max, imgsz_test)) print('Using %g dataloader workers' % nw) print('Starting training for %g epochs...' % epochs) print("------------------------------------------------", start_epoch, start_epoch + epochs) for epoch in range( start_epoch, start_epoch + epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if dataset.image_weights: w = model.class_weights.cpu().numpy() * (1 - maps)**2 # class weights image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w) dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx mloss = torch.zeros(4).to(device) # mean losses # print(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size')) # pbar = tqdm(enumerate(dataloader), total=nb) # progress bar for i, (imgs, targets, paths, _) in enumerate( dataloader ): # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device).float( ) / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 targets = targets.to(device) # Burn-in if ni <= n_burn * 2: model.gr = np.interp( ni, [0, n_burn * 2], [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou) if ni == n_burn: # burnin complete print_model_biases(model) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp( ni, [0, n_burn], [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, [0, n_burn], [0.9, hyp['momentum']]) # Multi-Scale if opt.multi_scale: if ni / accumulate % 1 == 0: # adjust img_size (67% - 150%) every 1 batch img_size = random.randrange(grid_min, grid_max + 1) * gs sf = img_size / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to 32-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward pred = model(imgs) # Loss loss, loss_items = compute_loss(pred, targets, model) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss_items) return results # Backward loss *= batch_size / 64 # scale loss if mixed_precision: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # Optimize if ni % accumulate == 0: optimizer.step() optimizer.zero_grad() ema.update(model) # Print mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = '%.3gG' % (torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.3g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, len(targets), img_size) # pbar.set_description(s) # Plot # if ni < 1: # f = 'train_batch%g.jpg' % i # filename # # plot_images(imgs=imgs, targets=targets, paths=paths, fname=f) # if tb_writer: # tb_writer.add_image(f, cv2.imread(f)[:, :, ::-1], dataformats='HWC') # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ # Update scheduler scheduler.step() # Process epoch results ema.update_attr(model) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP is_coco = any([ x in data for x in ['coco.data', 'coco2014.data', 'coco2017.data'] ]) and model.nc == 80 results, maps = test.test(cfg, data, batch_size=batch_size, img_size=imgsz_test, model=ema.ema, save_json=final_epoch and is_coco, single_cls=opt.single_cls, dataloader=testloader) # Write with open(results_file, 'a') as f: f.write(s + '%10.3g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp results.txt gs://%s/results/results%s.txt' % (opt.bucket, opt.name)) # Tensorboard if tb_writer: tags = [ 'train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1', 'val/giou_loss', 'val/obj_loss', 'val/cls_loss' ] for x, tag in zip(list(mloss[:-1]) + list(results), tags): tb_writer.add_scalar(tag, x, epoch) # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint chkpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema.module.state_dict() if hasattr(model, 'module') else ema.ema.state_dict(), 'optimizer': None if final_epoch else optimizer.state_dict() } # Save last, best and delete torch.save(chkpt, last) if (best_fitness == fi) and not final_epoch: torch.save(chkpt, best) del chkpt # end epoch ---------------------------------------------------------------------------------------------------- # end training n = opt.name if len(n): n = '_' + n if not n.isnumeric() else n fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]): if os.path.exists(f1): os.rename(f1, f2) # rename ispt = f2.endswith('.pt') # is *.pt strip_optimizer(f2) if ispt else None # strip optimizer os.system('gsutil cp %s gs://%s/weights' % ( f2, opt.bucket)) if opt.bucket and ispt else None # upload if not opt.evolve: plot_results() # save as results.png print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if torch.cuda.device_count() > 1 else None torch.cuda.empty_cache() return results
def test(cfg, data_cfg, batch_size=16, img_size=416, iou_thres=0.5, conf_thres=0.3, nms_thres=0.5, model=None): # Configure run data_cfg = parse_data_cfg(data_cfg) nc = int(data_cfg['classes']) # number of classes test_path = data_cfg['valid'] # path to test images names = load_classes(data_cfg['names']) # class names if model is None: device = select_device() num_classes = nc # Initialize model if "-tiny" in cfg: model = Yolov3Tiny(num_classes).to(device) # weights = 'weights-yolov3-tiny/best.pt' weights = "./yolov3-tiny_coco.pt" else: model = Yolov3(num_classes).to(device) # weights = 'weights-yolov3/best.pt' weights = "./finetune-weight/yolov3_coco.pt" # Load weights model.load_state_dict( torch.load(weights, map_location=device)['model']) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) else: device = next(model.parameters()).device # get model device print("using device: {}".format(device)) # Dataloader dataset = LoadImagesAndLabels(test_path, batch_size, img_size=img_size, augment=False) dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=0, pin_memory=False, collate_fn=dataset.collate_fn) seen = 0 model.eval() print(('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1')) loss, p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0., 0. jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc='Computing mAP')): targets = targets.to(device) nt = len(targets) if nt == 0: # if no targets continue continue imgs = imgs.to(device) # Run model inf_out, train_out = model(imgs) # inference and training outputs # Build targets target_list = build_targets(model, targets) # Compute loss loss_i, _ = compute_loss(train_out, target_list) loss += loss_i.item() # Run NMS output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres) # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] correct, detected = [], [] tcls = torch.Tensor() seen += 1 if pred is None: if len(labels): tcls = labels[:, 0].cpu() # target classes stats.append( (correct, torch.Tensor(), torch.Tensor(), tcls)) continue # Append to pycocotools JSON dictionary if len(labels): # Extract target boxes as (x1, y1, x2, y2) tbox = xywh2xyxy(labels[:, 1:5]) * img_size # target boxes tcls = labels[:, 0] # target classes for *pbox, pconf, pcls_conf, pcls in pred: if pcls not in tcls: correct.append(0) continue # Best iou, index between pred and targets iou, bi = bbox_iou(pbox, tbox).max(0) # If iou > threshold and class is correct mark as correct if iou > iou_thres and bi not in detected: correct.append(1) detected.append(bi) else: correct.append(0) else: # If no labels add number of detections as incorrect correct.extend([0] * len(pred)) # Append Statistics (correct, conf, pcls, tcls) stats.append( (correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls.cpu())) # Compute statistics stats_np = [np.concatenate(x, 0) for x in list(zip(*stats))] nt = np.bincount(stats_np[3].astype(np.int64), minlength=nc) # number of targets per class if len(stats_np): p, r, ap, f1, ap_class = ap_per_class(*stats_np) mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() # Print results pf = '%20s' + '%10.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1), end='\n\n') # Print results per class if nc > 1 and len(stats_np): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Return results return mp, mr, map, mf1, loss
#parser.add_argument('--batch-size', type=int, default=16) opt = parser.parse_args() print(opt) device = select_device(opt.device) if device == 'cpu': mixed_precision = False # 0、Initialize parameters( set random seed, get cfg info, ) cfg = opt.cfg weights = opt.weights img_size = opt.img_size batch_size = opt.batch_size total_epochs = opt.epochs init_seeds() data = parse_data_cfg(opt.data) train_txt_path = data['train'] valid_txt_path = data['valid'] nc = int(data['classes']) # 0、打印配置文件信息,写log等 print('clw: config file:', cfg) print('clw: pretrained weights:', weights) # 1、加载模型 model = Darknet(cfg).to(device) #model.apply(weights_init_normal) # clw note: without this can also get high mAP; TODO if weights.endswith('.pt'): ### model.load_state_dict(torch.load(weights)['model']) # 错误原因:没有考虑类别对不上的那一层,也就是yolo_layer前一层
def get_thin_model(cfg, backbone, neck, data, origin_weights, img_size, batch_size, prune_rate, aux_epochs=50, ft_epochs=15, resume=False, cache_images=False, start_layer='75'): init_seeds() # -----------------dataset----------------- data_dict = parse_data_cfg(data) train_path = data_dict['train'] test_path = data_dict['valid'] dataset = LoadImagesAndLabels( train_path, img_size, batch_size, augment=True, hyp=hyp, # augmentation hyperparameters rect=False, # rectangular training cache_labels=True, cache_images=cache_images) batch_size = min(batch_size, len(dataset)) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers train_loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, num_workers=nw, shuffle=True, # Shuffle=True unless rectangular training is used pin_memory=True, collate_fn=dataset.collate_fn) test_loader = torch.utils.data.DataLoader(LoadImagesAndLabels( test_path, img_size, batch_size * 2, hyp=hyp, rect=True, cache_labels=True, cache_images=cache_images), batch_size=batch_size * 2, num_workers=nw, pin_memory=True, collate_fn=dataset.collate_fn) # -----------------dataset----------------- # -----------get trained aux net----------- if aux_trained: aux_chkpt = torch.load(aux_weight) if aux_chkpt["epoch"] + 1 != aux_epochs: del aux_chkpt train_aux_for_DCP(cfg, backbone, neck, train_loader, origin_weights, aux_weight, hyp, device, resume=True, epochs=aux_epochs) else: del aux_chkpt else: train_aux_for_DCP(cfg, backbone, neck, train_loader, origin_weights, aux_weight, hyp, device, resume=False, epochs=aux_epochs) # -----------get trained aux net----------- # ----------init model and aux util---------- origin_model = Darknet(cfg).to(device) chkpt = torch.load(origin_weights, map_location=device) origin_model.load_state_dict(chkpt['model'], strict=True) aux_util = AuxNetUtils(origin_model, hyp, backbone, neck, strategy="DCP") del chkpt # ----------init model and aux net---------- mask_cfg, init_state_dict = mask_cfg_and_converted( aux_util.mask_replace_layer, cfg, origin_weights, target=None) # ----------start from first layer---------- if not resume: first_progress = { 'current_layer': start_layer, 'epoch': -1, 'model': init_state_dict, 'optimizer': None } aux_chkpt = torch.load(aux_weight) for k, v in aux_chkpt.items(): if 'aux' in k: first_progress[k] = v del aux_chkpt torch.save(first_progress, progress_chkpt) with open(progress_result, 'a') as f: t = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) f.write('\n' + t + '\n') # ----------start from first layer---------- layer = start_layer if start_layer == aux_util.pruning_layer[-1]: return mask_cfg, aux_util while int(layer) < int(aux_util.pruning_layer[-1]): layer = fine_tune(mask_cfg, data, aux_util, device, train_loader, test_loader, ft_epochs) channels_select(mask_cfg, data, origin_model, aux_util, device, train_loader, layer, prune_rate) return mask_cfg, aux_util
def train( cfg, data_cfg, resume=False, epochs=273, # 500200 batches at bs 64, dataset length 117263 batch_size=16, accumulate=1, weights_path='weights', init_weights='yolov3-player_stage2_start.81'): #init_seeds() weights = weights_path + os.sep latest = weights + 'latest.pt' best = weights + 'best.pt' device, n_gpu = torch_utils.select_device() #Image size cfg_model = parse_cfg(cfg) img_size = (int(cfg_model[0]['width']), int(cfg_model[0]['height'])) # Configure run train_path = parse_data_cfg(data_cfg)['train_path'] train_set = parse_data_cfg(data_cfg)['train_set'] # Initialize model model = Darknet(cfg).to(device) # Optimizer optimizer = optim.SGD(model.parameters(), lr=hyp['lr0'], momentum=hyp['momentum'], weight_decay=hyp['weight_decay']) cutoff = -1 # backbone reaches to cutoff layer start_epoch = 0 best_loss = float('inf') if resume: # Load previously saved model(resume from latest.pt) chkpt = torch.load(latest, map_location=device) # load checkpoint model.load_state_dict(chkpt['model']) start_epoch = chkpt['epoch'] + 1 if chkpt['optimizer'] is not None: optimizer.load_state_dict(chkpt['optimizer']) best_loss = chkpt['best_loss'] del chkpt else: # Initialize model with backbone (optional) model.load_weights(weights + init_weights) # Scheduler scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 440, 1097], gamma=0.1, last_epoch=start_epoch - 1) # Dataset dataset = YoloDataSets(data_path=train_path, input_size=img_size, batch_size=batch_size, image_set=train_set, augment=True, jitter_x=0.3, jitter_y=0.3) # Initialize distributed training if torch.cuda.device_count() > 1: dist.init_process_group(backend=opt.backend, init_method=opt.dist_url, world_size=opt.world_size, rank=opt.rank) model = torch.nn.parallel.DistributedDataParallel(model) # sampler = torch.utils.data.distributed.DistributedSampler(dataset) # Dataloader dataloader = DataLoader( dataset, batch_size=batch_size, num_workers=opt.num_workers, shuffle=False, # disable rectangular training if True pin_memory=True, collate_fn=dataset.collate_fn) # Mixed precision training https://github.com/NVIDIA/apex # install help: https://github.com/NVIDIA/apex/issues/259 mixed_precision = False if mixed_precision: from apex import amp model, optimizer = amp.initialize(model, optimizer, opt_level='O1') # Start training t = time.time() model.hyp = hyp # attach hyperparameters to model #model_info(model) nb = len(dataloader) results = (0, 0, 0, 0, 0) # P, R, mAP, F1, test_loss n_burnin = int(cfg_model[0]["burn_in"]) # burn-in batches for epoch in range(start_epoch, epochs): model.train() print( ('\n%8s%12s' + '%10s' * 7) % ('Epoch', 'Batch', 'xy', 'wh', 'conf', 'cls', 'total', 'nTargets', 'time')) # Update scheduler scheduler.step(epoch) mloss = torch.zeros(5).to(device) # mean losses for i, (imgs, targets) in enumerate(dataloader): imgs = imgs.to(device) targets = targets.to(device) nt = len(targets) #plot_images(imgs=imgs, targets=targets, fname='train_batch%d.jpg' % i) # SGD burn-in if epoch == 0 and i <= n_burnin: lr = hyp['lr0'] * (i / n_burnin)**4 for x in optimizer.param_groups: x['lr'] = lr if i == 0: print('learning rate: %g' % optimizer.param_groups[0]['lr']) # Run model pred, loss, loss_items = model(imgs, targets) loss = torch.mean(loss) n_ = int(loss_items.size()[0] / 5) loss_items = torch.mean(loss_items.view((n_, 5)), 0) if torch.isnan(loss): print('WARNING: nan loss detected, ending training') return results # Compute gradient if mixed_precision: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # Accumulate gradient for x batches before optimizing if (i + 1) % accumulate == 0 or (i + 1) == nb: optimizer.step() optimizer.zero_grad() # Update running mean of tracked metrics mloss = (mloss * i + loss_items) / (i + 1) # Print batch results s = ('%8s%12s' + '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nb - 1), *mloss, nt, time.time() - t) t = time.time() print(s) # Calculate mAP (always test final epoch, skip first 5 if opt.nosave) if not (opt.notest or (opt.nosave and epoch < 5)) or epoch == epochs - 1: with torch.no_grad(): results, maps = test.test(cfg, data_cfg, batch_size=batch_size, img_size=img_size, model=model, conf_thres=0.1, iou_thres=0.4) # Write epoch results with open('results.txt', 'a') as file: file.write(s + '%11.3g' * 5 % results + '\n') # P, R, mAP, F1, test_loss # Update best loss test_loss = results[4] if test_loss < best_loss: best_loss = test_loss # Save training results save = True and not opt.nosave if save: # Create checkpoint chkpt = { 'epoch': epoch, 'best_loss': best_loss, 'model': model.module.state_dict() if type(model) is nn.parallel.DistributedDataParallel else model.state_dict(), 'optimizer': optimizer.state_dict() } # Save latest checkpoint torch.save(chkpt, latest) # Save best checkpoint if best_loss == test_loss: torch.save(chkpt, best) # Save backup every 10 epochs (optional) if epoch > 0 and epoch % 10 == 0: torch.save(chkpt, weights + 'backup%g.pt' % epoch) # Delete checkpoint del chkpt return results
def detect( model_path, classify_model_path, label_path, root_path, cfg, data_cfg, img_size=416, conf_thres=0.5, nms_thres=0.5, ): classes = load_classes(parse_data_cfg(data_cfg)['names']) num_classes = len(classes) # Initialize model if "-tiny" in cfg: model = Yolov3Tiny(num_classes) weights = model_path else: model = Yolov3(num_classes) weights = model_path show_model_param(model) # 显示模型参数 device = select_device(False) # 运行硬件选择 classify_model, labels_dogs_list = Create_Classify_Model( device, classify_model_path, label_path) # Load weights if os.access(weights, os.F_OK): # 判断模型文件是否存在 model.load_state_dict( torch.load(weights, map_location=device)['model']) else: print('error model not exists') return False model.to(device).eval() # 设置 模型 eval colors = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) for v in range(1, num_classes + 1)][::-1] use_cuda = torch.cuda.is_available() for img_name in os.listdir(root_path): img_path = root_path + img_name im0 = cv2.imread(img_path) im_c = cv2.imread(img_path) print("---------------------") t = time.time() img = process_data(im0, img_size) if use_cuda: torch.cuda.synchronize() t1 = time.time() print("process time:", t1 - t) img = torch.from_numpy(img).unsqueeze(0).to(device) pred, _ = model(img) if use_cuda: torch.cuda.synchronize() t2 = time.time() print("inference time:", t2 - t1) detections = non_max_suppression(pred, conf_thres, nms_thres)[0] if use_cuda: torch.cuda.synchronize() t3 = time.time() print("get res time:", t3 - t2) if detections is None or len(detections) == 0: continue # Rescale boxes from 416 to true image size detections[:, :4] = scale_coords(img_size, detections[:, :4], im0.shape).round() result = [] for res in detections: result.append( (classes[int(res[-1])], float(res[4]), [int(res[0]), int(res[1]), int(res[2]), int(res[3])])) if use_cuda: torch.cuda.synchronize() s2 = time.time() print("detect time:", s2 - t) print(result) # Draw bounding boxes and labels of detections for *xyxy, conf, cls_conf, cls in detections: label = '%s %.2f' % (classes[int(cls)], conf) #------------------------------------------------------------------- plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) x_1 = int(xyxy[0]) y_1 = int(xyxy[1]) x_2 = int(xyxy[2]) y_2 = int(xyxy[3]) #-------------------- img_crop_ = cv2.resize(im_c[y_1:y_2, x_1:x_2, :], (224, 224), interpolation=cv2.INTER_CUBIC) img_crop_ = img_crop_.astype(np.float32) img_crop_ = prewhiten(img_crop_) img_crop_ = torch.from_numpy(img_crop_) img_crop_ = img_crop_.unsqueeze_(0) img_crop_ = img_crop_.permute(0, 3, 1, 2) if use_cuda: # img_crop_ = img_crop_.cuda() # (bs, 3, h, w) outputs = F.softmax(classify_model(img_crop_.float()), dim=1) outputs = outputs[0] outputx = outputs.cpu().detach().numpy() # print('output: ',output) max_index = np.argmax(outputx) scorex_ = outputx[max_index] label_dog_ = labels_dogs_list[max_index] print('label_dog_ : ', label_dog_) plot_one_box((x_1, y_1 + 20, x_2, y_2), im0, label=label_dog_ + '_' + '%.2f' % (scorex_), color=colors[int(cls)]) #----------------------- cv2.namedWindow('crop', 0) cv2.imshow('crop', im_c[y_1:y_2, x_1:x_2, :]) cv2.namedWindow('result', 0) cv2.imshow("result", im0) key = cv2.waitKey(0) if key == 27: break
def train(hyper): device = torch.device(opt.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) results_file = "results.txt" data = opt.data epochs = opt.epochs batch_size = opt.batch_size img_size_train = opt.img_size img_size_test = opt.img_size # test image sizes multi_scale = opt.multi_scale # Image sizes # 图像要设置成32的倍数 grid_size = 32 # (pixels) grid size assert math.fmod(img_size_test, grid_size) == 0, "--img-size %g must be a %g-multiple" % ( img_size_test, grid_size) grid_min, grid_max = img_size_test // grid_size, img_size_test // grid_size if multi_scale: img_size_min = opt.img_size // 1.5 img_size_max = opt.img_size // 0.667 # 将给定的最大,最小输入尺寸向下调整到32的整数倍 grid_min, grid_max = img_size_min // grid_size, img_size_max // grid_size img_size_min, img_size_max = int(grid_min * grid_size), int(grid_max * grid_size) img_size_train = img_size_max # initialize with max size print("Using multi_scale training, image range[{}, {}]".format( img_size_min, img_size_max)) # configure run # init_seeds() # 初始化随机种子,保证结果可复现 data_dict = parse_data_cfg(data) train_path = data_dict["train"] test_path = data_dict["valid"] num_cls = 1 if opt.single_cls else int( data_dict["classes"]) # number of classes hyper[ "cls"] *= num_cls / 80 # update coco-tuned hyp['cls'] to current dataset hyper["obj"] *= img_size_test / 320 # Remove previous results for file in glob.glob(results_file): os.remove(file) # Initialize model # model = YOLOV3_SPP(cfg).to(device) model = YOLO_SPP(num_cls).to(device) # 是否冻结权重,只训练predictor的权重 if isinstance(model, YOLOV3_SPP): weights = './weights/yolov3-spp-ultralytics-512.pt' else: weights = './weights/yolov3spp.pt' if isinstance(model, YOLOV3_SPP) and False: if opt.freeze_layers: # 索引减一对应的是predictor的索引,YOLOLayer并不是predictor output_layer_indices = [ idx - 1 for idx, module in enumerate(model.module_list) if isinstance(module, YOLOBlk) ] # 冻结除predictor和YOLOLayer外的所有层 freeze_layer_indices = [ x for x in range(len(model.module_list)) if (x not in output_layer_indices) and ( x - 1 not in output_layer_indices) ] # Freeze non-output layers # 总共训练3x2=6个parameters for idx in freeze_layer_indices: for parameter in model.module_list[idx].parameters(): parameter.requires_grad_(False) else: # 如果freeze_layer为False,默认仅训练除darknet53之后的部分 # 若要训练全部权重,删除以下代码 darknet_end_layer = 74 # only yolov3spp cfg # Freeze darknet53 layers # 总共训练21x3+3x2=69个parameters for idx in range(darknet_end_layer + 1): # [0, 74] for parameter in model.module_list[idx].parameters(): parameter.requires_grad_(False) else: if opt.freeze_layers: model.freeze_layers(model.index_anchors) # optimizer params_grad = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(params_grad, lr=hyper["lr0"], momentum=hyper["momentum"], weight_decay=hyper["weight_decay"], nesterov=True) start_epoch = 0 if weights.endswith(".pt") or weights.endswith(".pth"): epochs, start_epoch = loadCKPT(model, optimizer, epochs, weights, results_file, device, True) train_loader = None bool_trainer = True num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers # dataset if bool_trainer: # 训练集的图像尺寸指定为 multi_scale_range 中最大的尺寸 train_loader = dataLoader(train_path, img_size_train, batch_size, True, hyper, opt.rect, cache_images=opt.cache_images, single_cls=opt.single_cls, num_workers=num_workers, pin_memory=True) # 验证集的图像尺寸指定为 img_size(512) test_loader = dataLoader(test_path, img_size_test, 1, True, hyper, cache_images=opt.cache_images, single_cls=opt.single_cls, num_workers=num_workers, pin_memory=True) # Model parameters loss_cfg = { 'num_cls': num_cls, # attach number of classes to model 'hyp': hyper, # attach hyper parameters to model 'ratio': 1.0, # giou loss ratio (obj_loss = 1.0 or giou) 'anchors': model.anchor_vec, # anchors } # Scheduler https://arxiv.org/pdf/1812.01187.pdf lr_lambda = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * ( 1 - hyper["lrf"]) + hyper["lrf"] # cosine multi_gpu = type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) trainer = Trainer(model, optimizer, loss=YoloLoss(multi_gpu=multi_gpu, cfg=loss_cfg), lr_lambda=lr_lambda, last_epoch=start_epoch) if bool_trainer: print("starting training for %g epochs..." % epochs) print('Using %g data loader workers' % num_workers) trainer.fit_generate(train_loader, epochs=epochs, test_loader=test_loader, print_freq=50, save_best=True, multi_scale=multi_scale, img_size=img_size_train, grid_min=grid_min, grid_max=grid_max, grid_size=grid_size, device=device, warmup=True) else: trainer.evaluate(test_loader, device=device) pass
def train(data_cfg='cfg/face.data', accumulate=1): # Configure run get_data_cfg = parse_data_cfg(data_cfg) # 返回训练配置参数,类型:字典 gpus = get_data_cfg['gpus'] num_workers = int(get_data_cfg['num_workers']) cfg_model = get_data_cfg['cfg_model'] train_path = get_data_cfg['train'] num_classes = int(get_data_cfg['classes']) finetune_model = get_data_cfg['finetune_model'] batch_size = int(get_data_cfg['batch_size']) img_size = int(get_data_cfg['img_size']) multi_scale = get_data_cfg['multi_scale'] epochs = int(get_data_cfg['epochs']) lr_step = str(get_data_cfg['lr_step']) lr0 = float(get_data_cfg['lr0']) os.environ['CUDA_VISIBLE_DEVICES'] = gpus device = select_device() if multi_scale == 'True': multi_scale = True else: multi_scale = False print('data_cfg : ', data_cfg) print('voc.data config len : ', len(get_data_cfg)) print('GPUs : ', gpus) print('num_workers : ', num_workers) print('model : ', cfg_model) print('Finetune_model : ', finetune_model) print('train_path : ', train_path) print('num_classes : ', num_classes) print('batch_size : ', batch_size) print('img_size : ', img_size) print('multi_scale : ', multi_scale) print('lr_step : ', lr_step) print('lr0 : ', lr0) a_scalse = 416. / img_size anchors = [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)] anchors_new = [(int(anchors[j][0] / a_scalse), int(anchors[j][1] / a_scalse)) for j in range(len(anchors))] model = Yolov3(num_classes, anchors=anchors_new) weights = './weights' # mkdir save model document if not os.path.exists(weights): os.mkdir(weights) model = model.to(device) latest = weights + 'latest_{}.pt'.format(img_size) best = weights + 'best_{}.pt'.format(img_size) # Optimizer optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005) start_epoch = 0 if os.access(finetune_model, os.F_OK): # load retrain/finetune_model print('loading yolo-v3 finetune_model ~~~~~~', finetune_model) not_load_filters = 3 * (80 + 5) # voc: 3*(20+5), coco: 3*(80+5)=255 chkpt = torch.load(finetune_model, map_location=device) model.load_state_dict( {k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != not_load_filters}, strict=False) # model.load_state_dict(chkpt['model']) if 'coco' not in finetune_model: start_epoch = chkpt['epoch'] if chkpt['optimizer'] is not None: optimizer.load_state_dict(chkpt['optimizer']) best_loss = chkpt['best_loss'] milestones = [int(i) for i in lr_step.split(",")] print('milestones : ', milestones) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(i) for i in lr_step.split(",")], gamma=0.1, last_epoch=start_epoch - 1) # Dataset print('multi_scale : ', multi_scale) dataset = LoadImagesAndLabels(train_path, batch_size=batch_size, img_size=img_size, augment=True, multi_scale=multi_scale) print('--------------->>> imge num ---------->>>: ', dataset.__len__()) dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory=False, drop_last=False, collate_fn=dataset.collate_fn) t = time.time() nB = len(dataloader) n_burnin = min(round(nB / 5 + 1), 1000) # burn-in batches best_loss = float('inf') test_loss = float('inf') flag_start = False for epoch in range(0, epochs): model.train() if flag_start: scheduler.step() flag_start = True mloss = defaultdict(float) # mean loss for i, (imgs, targets, img_path_, _) in enumerate(dataloader): multi_size = imgs.size() imgs = imgs.to(device) targets = targets.to(device) nt = len(targets) if nt == 0: # if no targets continue continue if epoch == 0 and i <= n_burnin: lr = lr0 * (i / n_burnin) ** 4 for x in optimizer.param_groups: x['lr'] = lr # Run model pred = model(imgs) # Build targets target_list = build_targets(model, targets) # Compute loss loss, loss_dict = compute_loss(pred, target_list) # Compute gradient loss.backward() # Accumulate gradient for x batches before optimizing if (i + 1) % accumulate == 0 or (i + 1) == nB: optimizer.step() optimizer.zero_grad() # Running epoch-means of tracked metrics for key, val in loss_dict.items(): mloss[key] = (mloss[key] * i + val) / (i + 1) print( 'Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.3f}, ' 'wh {:.3f}, ' 'conf {:.3f}, cls {:.3f}, total {:.3f}, time {:.3f}s'.format(epoch, epochs - 1, i, nB - 1, multi_size[2], multi_size[3] , nt, scheduler.get_lr()[0], mloss['xy'], mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], time.time() - t), end='\r') s = ('%8s%12s' + '%10.3g' * 7) % ('%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nB - 1), mloss['xy'], mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], nt, time.time() - t) t = time.time() print() # Create checkpoint chkpt = {'epoch': epoch, 'best_loss': best_loss, 'model': model.module.state_dict() if type( model) is nn.parallel.DistributedDataParallel else model.state_dict(), 'optimizer': optimizer.state_dict()} # Save latest checkpoint torch.save(chkpt, latest) # Save best checkpoint if best_loss == test_loss and epoch % 5 == 0: torch.save(chkpt, best) # Save backup every 10 epochs (optional) if epoch > 0 and epoch % 5 == 0: torch.save(chkpt, weights + 'yoloV3_{}_epoch_{}.pt'.format(img_size, epoch)) # Delete checkpoint del chkpt
def detect(save_txt=False, save_img=False): img_size = ( 320, 192 ) if ONNX_EXPORT else opt.img_size # (320, 192) or (416, 256) or (608, 352) for (height, width) out, source, weights, half, view_img = opt.output, opt.source, opt.weights, opt.half, opt.view_img webcam = source == '0' or source.startswith('rtsp') or source.startswith( 'http') or source.endswith('.txt') # Initialize device = torch_utils.select_device( device='cpu' if ONNX_EXPORT else opt.device) if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Initialize model model = Darknet(opt.cfg, img_size) # Load weights attempt_download(weights) if weights.endswith('.pt'): # pytorch format model.load_state_dict( torch.load(weights, map_location=device)['model']) else: # darknet format _ = load_darknet_weights(model, weights) # Second-stage classifier classify = False if classify: modelc = torch_utils.load_classifier(name='resnet101', n=2) # initialize modelc.load_state_dict( torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights modelc.to(device).eval() # Fuse Conv2d + BatchNorm2d layers # model.fuse() # Eval mode model.to(device).eval() # Export mode if ONNX_EXPORT: img = torch.zeros((1, 3) + img_size) # (1, 3, 320, 192) torch.onnx.export(model, img, 'weights/export.onnx', verbose=False, opset_version=11) # Validate exported model import onnx model = onnx.load('weights/export.onnx') # Load the ONNX model onnx.checker.check_model(model) # Check that the IR is well formed print(onnx.helper.printable_graph( model.graph)) # Print a human readable representation of the graph return # Half precision half = half and device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Set Dataloader vid_path, vid_writer = None, None if webcam: view_img = True torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference dataset = LoadStreams(source, img_size=img_size, half=half) else: save_img = True dataset = LoadImages(source, img_size=img_size, half=half) # Get classes and colors classes = load_classes(parse_data_cfg(opt.data)['names']) colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(classes))] # Run inference t0 = time.time() forward_time_total = 0 for path, img, im0s, vid_cap in dataset: t = time.time() # Get detections img = torch.from_numpy(img).to(device) if img.ndimension() == 3: img = img.unsqueeze(0) forward_time = time.time() pred = model(img)[0] if opt.half: pred = pred.float() # Apply NMS pred = non_max_suppression(pred, opt.conf_thres, opt.nms_thres) # Apply if classify: pred = apply_classifier(pred, modelc, img, im0s) # Process detections for i, det in enumerate(pred): # detections per image if webcam: # batch_size >= 1 p, s, im0 = path[i], '{}: '.format(i), im0s[i] else: p, s, im0 = path, '', im0s save_path = str(Path(out) / Path(p).name) s += '{}x{} '.format(*img.shape[2:]) # print string if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): n = (det[:, -1] == c).sum() # detections per class s += '{} {}s, '.format(n, classes[int(c)]) # add to string # Write results for *xyxy, conf, _, cls in det: if save_txt: # Write to file with open(save_path + '.txt', 'a') as file: file.write( ('{} ' * 6 + '\n').format(*xyxy, cls, conf)) if save_img or view_img: # Add bbox to image label = '{} {:.2f}'.format(classes[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) end_time = time.time() forward_time_total += end_time - forward_time print('{}Done. (net: {:.3f}s, total: {:.3f}s)'.format( s, end_time - forward_time, end_time - t, )) # Stream results if view_img: cv2.imshow(p, im0) # Save results (image with detections) if save_img: if dataset.mode == 'images': cv2.imwrite(save_path, im0) else: if vid_path != save_path: # new video vid_path = save_path if isinstance(vid_writer, cv2.VideoWriter): vid_writer.release( ) # release previous video writer fps = vid_cap.get(cv2.CAP_PROP_FPS) w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) vid_writer = cv2.VideoWriter( save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) vid_writer.write(im0) if save_txt or save_img: print('Results saved to {}'.format(os.getcwd()) + os.sep + out) if platform == 'darwin': # MacOS os.system('open ' + out + ' ' + save_path) print('Done. (total: {:.3f}s, net avg: {:.3f}ms)'.format( time.time() - t0, forward_time_total / len(dataset) * 1000, ))
def detect( model_path, root_path, cfg, data_cfg, img_size=416, conf_thres=0.5, nms_thres=0.5, ): classes = load_classes(parse_data_cfg(data_cfg)['names']) num_classes = len(classes) # Initialize model if "-tiny" in cfg: model = Yolov3Tiny(num_classes) weights = model_path else: model = Yolov3(num_classes) weights = model_path show_model_param(model)# 显示模型参数 device = select_device() # 运行硬件选择 use_cuda = torch.cuda.is_available() # Load weights if os.access(weights,os.F_OK):# 判断模型文件是否存在 model.load_state_dict(torch.load(weights, map_location=device)['model']) else: print('error model not exists') return False model.to(device).eval()#模型设置为 eval colors = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) for v in range(1, num_classes + 1)][::-1] for img_name in os.listdir(root_path): img_path = root_path + img_name im0 = cv2.imread(img_path) print("---------------------") t = time.time() img = process_data(im0, img_size) if use_cuda: torch.cuda.synchronize() t1 = time.time() print("process time:", t1-t) img = torch.from_numpy(img).unsqueeze(0).to(device) pred, _ = model(img)#图片检测 if use_cuda: torch.cuda.synchronize() t2 = time.time() print("inference time:", t2-t1) detections = non_max_suppression(pred, conf_thres, nms_thres)[0] # nms if use_cuda: torch.cuda.synchronize() t3 = time.time() print("get res time:", t3-t2) if detections is None or len(detections) == 0: continue # Rescale boxes from 416 to true image size detections[:, :4] = scale_coords(img_size, detections[:, :4], im0.shape).round() result = [] for res in detections: result.append((classes[int(res[-1])], float(res[4]), [int(res[0]), int(res[1]), int(res[2]), int(res[3])])) if use_cuda: torch.cuda.synchronize() s2 = time.time() print("detect time:", s2 - t) print(result) # Draw bounding boxes and labels of detections for *xyxy, conf, cls_conf, cls in detections: label = '%s %.2f' % (classes[int(cls)], conf) plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) cv2.namedWindow('result',0) cv2.imshow("result", im0) key = cv2.waitKey(0) if key == 27: break
def detect(ModelPath, cfg, data_cfg, ImgSize=416, ConfThres=0.5, NMSThres=0.5, VideoPath=0): classes = load_classes(parse_data_cfg(data_cfg)['names']) num_classes = len(classes) # 初始化模型 weights = ModelPath A_Scalse = 416. / ImgSize anchors = [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)] anchors_new = [(int(anchors[j][0] / A_Scalse), int(anchors[j][1] / A_Scalse)) for j in range(len(anchors))] model = Yolov3(num_classes, anchors=anchors_new) device = select_device() # 运行硬件选择 use_cuda = torch.cuda.is_available() # Load weights if os.access(weights, os.F_OK): # 判断模型文件是否存在 model.load_state_dict( torch.load(weights, map_location=device)['model']) else: print('error model not exists') return False model.to(device).eval() # 模型模式设置为 eval colors = [(v // 32 * 64 + 64, (v // 8) % 4 * 64, v % 8 * 32) for v in range(1, num_classes + 1)][::-1] video_capture = cv2.VideoCapture(VideoPath) video_writer = None loc_time = time.localtime() str_time = time.strftime("%Y-%m-%d_%H-%M-%S", loc_time) save_video_path = "./demo/demo_{}.mp4".format(str_time) # ------------------------------------------------- while True: ret, im0 = video_capture.read() if ret: t = time.time() # im0 = cv2.imread("picture/1.png") img = process_data(im0, ImgSize) if use_cuda: torch.cuda.synchronize() t1 = time.time() # print("process time:", t1 - t) img = torch.from_numpy(img).unsqueeze(0).to(device) pred, _ = model(img) # 图片检测 if use_cuda: torch.cuda.synchronize() t2 = time.time() # print("inference time:", t2 - t1) detections = non_max_suppression(pred, ConfThres, NMSThres)[0] # nms if use_cuda: torch.cuda.synchronize() t3 = time.time() # print("get res time:", t3 - t2) if detections is None or len(detections) == 0: cv2.namedWindow('image', 0) cv2.imshow("image", im0) key = cv2.waitKey(1) if key == 27: break continue # Rescale boxes from 416 to true image size detections[:, :4] = scale_coords(ImgSize, detections[:, :4], im0.shape).round() result = [] for res in detections: result.append( (classes[int(res[-1])], float(res[4]), [int(res[0]), int(res[1]), int(res[2]), int(res[3])])) if use_cuda: torch.cuda.synchronize() for r in result: print(r) for *xyxy, conf, cls_conf, cls in detections: label = '%s %.2f' % (classes[int(cls)], conf) xyxy = int(xyxy[0]), int(xyxy[1]) + 6, int(xyxy[2]), int( xyxy[3]) if int(cls) == 0: plot_one_box(xyxy, im0, label=label, color=(255, 255, 95), line_thickness=3) else: plot_one_box(xyxy, im0, label=label, color=(15, 155, 255), line_thickness=3) s2 = time.time() # print("detect time: {} \n".format(s2 - t)) str_fps = ("{:.2f} FPS".format(1. / (s2 - t + 0.00001))) cv2.putText(im0, str_fps, (5, im0.shape[0] - 3), cv2.FONT_HERSHEY_DUPLEX, 0.9, (255, 255, 255), 4) cv2.putText(im0, str_fps, (5, im0.shape[0] - 3), cv2.FONT_HERSHEY_DUPLEX, 0.9, (0, 0, 0), 1) cv2.namedWindow('image', 0) cv2.imshow("image", im0) key = cv2.waitKey(1) if video_writer is None: fourcc = cv2.VideoWriter_fourcc(*"mp4v") video_writer = cv2.VideoWriter(save_video_path, fourcc, fps=25, frameSize=(im0.shape[1], im0.shape[0])) video_writer.write(im0) if key == 27: break else: break cv2.destroyAllWindows() video_writer.release()
def detect(): # 0、初始化一些参数 cfg = opt.cfg weights = opt.weights src_txt_path = opt.src_txt_path img_size = opt.img_size batch_size = opt.batch_size dst_path = opt.dst_path if not os.path.exists(dst_path): os.mkdir(dst_path) device = select_device(opt.device) classes = load_classes(parse_data_cfg(opt.data)['names']) # 1、加载网络 model = Darknet(cfg) if weights.endswith('.pt'): # TODO: .weights权重格式 model.load_state_dict( torch.load(weights)['model']) # TODO:map_location=device ? model.to(device).eval() # 2、加载数据集 test_dataset = VocDataset(src_txt_path, img_size, with_label=False) dataloader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, # TODO collate_fn=test_dataset.test_collate_fn) # TODO # 3、预测,前向传播 start = time.time() pbar = tqdm(dataloader) for i, (img_tensor, img0, img_name) in enumerate(pbar): pbar.set_description("Already Processed %d image: " % (i + 1)) # print('clw: Already Processed %d image' % (i+1)) img_tensor = img_tensor.to(device) # (bs, 3, 416, 416) output = model(img_tensor)[ 0] # (x1, y1, x2, y2, obj_conf, class_conf, class_pred) # NMS nms_output = non_max_suppression(output, opt.conf_thres, opt.nms_thres) # 可视化 for batch_idx, det in enumerate(nms_output): # detections per image if det is not None: # and len(det): # clw note: important ! #or box in det: for *box, conf, _, cls in det: # det: tensor.Size (bs, 7) box: list orig_h, orig_w = img0[batch_idx].shape[:2] # 坐标变换 new_h = new_w = img_tensor.size()[ 2] # 绘图,resize后的图的框 -> 原图的框,new -> orig ratio_h = orig_h / new_h ratio_w = orig_w / new_w x1 = int(ratio_w * box[0]) y1 = int(ratio_h * box[1]) x2 = int(ratio_w * (box[2])) y2 = int(ratio_h * (box[3])) label = '%s %.2f' % (classes[int(cls)], conf) # 预测结果可视化 plot_one_box([x1, y1, x2, y2], img0[batch_idx], label=label, color=(255, 0, 0)) #cv2.rectangle(img0[batch_idx], (x1, y1), (x2, y2), (0, 0, 255), 1) # 如果报错 TypeError: an integer is required (got type tuple),检查是不是传入了img_tensor if SAVE: # 保存结果 cv2.imwrite(os.path.join(dst_path, img_name[batch_idx]), img0[batch_idx]) if SHOW: cv2.imshow('aaa', img0[batch_idx]) cv2.waitKey(0) print('time use: %.3fs' % (time.time() - start))
def test(cfg, data_cfg, weights=None, batch_size=16, img_size=416, iou_thres=0.5, conf_thres=0.001, nms_thres=0.5, save_json=False, model=None): if model is None: device = torch_utils.select_device() # Initialize model model = Darknet(cfg).to(device) # Load weights if weights.endswith('.pt'): # pytorch format model.load_state_dict( torch.load(weights, map_location=device)['model']) else: # darknet format _ = load_darknet_weights(model, weights) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) else: device = next(model.parameters()).device # get model device n_gpu = torch.cuda.device_count() # Configure run data_cfg = parse_data_cfg(data_cfg) nc = int(data_cfg['classes']) # number of classes test_path = data_cfg['valid_path'] # path to test images test_set = data_cfg['valid_set'] names = load_classes(data_cfg['names']) # class names # Dataset dataset = YoloDataSets(data_path=test_path, input_size=img_size, batch_size=batch_size, image_set=test_set, augment=False) dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=4, pin_memory=True, collate_fn=dataset.collate_fn) seen = 0 model.eval() #coco91class = coco80_to_coco91_class() print(('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP', 'F1')) loss, p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0., 0. jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (imgs, targets) in enumerate(tqdm(dataloader, desc='Computing mAP')): imgs = imgs.to(device) targets = targets.to(device) _, _, height, width = imgs.shape # Run model inf_out, loss_i, loss_item = model( imgs, targets) # inference and training outputs loss += torch.mean(loss_i) # Run NMS output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres) # Statistics per image true_targets = targets[torch.sum( targets[:, 1:6], 1) != 0] # remove the targets that fills 0 for data distribution. for si, pred in enumerate(output): labels = true_targets[true_targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 4].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) continue # Assign all predictions as incorrect correct = [0] * len(pred) if nl: detected = [] tcls_tensor = labels[:, 4] # target boxes tbox = xywh2xyxy(labels[:, 0:4]) tbox[:, [0, 2]] *= width tbox[:, [1, 3]] *= height # Search for correct predictions for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == nl: break # Continue if predicted class not among image classes if pcls.item() not in tcls: continue # Best iou, index between pred and targets m = (pcls == tcls_tensor).nonzero().view(-1) iou, bi = bbox_iou(pbox, tbox[m]).max(0) # If iou > threshold and class is correct mark as correct if iou > iou_thres and m[ bi] not in detected: # and pcls == tcls[bi]: correct[i] = 1 detected.append(m[bi]) # Append statistics (correct, conf, pcls, tcls) stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # to numpy nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() # Print results pf = '%20s' + '%10.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1), end='\n\n') # Print results per class if nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Return results maps = np.zeros(nc) for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map, mf1, loss / len(dataloader)), maps
def test( cfg, data, weights=None, batch_size=16, img_size=416, iou_thres=0.5, conf_thres=0.001, nms_thres=0.5, save_json=False, model=None, ): # Initialize/load model and set device if model is None: device = torch_utils.select_device(opt.device) verbose = True # Initialize model model = Darknet(cfg, img_size).to(device) # Load weights attempt_download(weights) if weights.endswith('.pt'): # pytorch format model.load_state_dict( torch.load(weights, map_location=device)['model']) else: # darknet format _ = load_darknet_weights(model, weights) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) else: device = next(model.parameters()).device # get model device verbose = False # Configure run data = parse_data_cfg(data) nc = int(data['classes']) # number of classes test_path = data['valid'] # path to test images names = load_classes(data['names']) # class names # Dataloader dataset = LoadImagesAndLabels(test_path, img_size, batch_size) dataloader = DataLoader( dataset, batch_size=batch_size, num_workers=min([os.cpu_count(), batch_size, 16]), pin_memory=True, collate_fn=dataset.collate_fn, ) seen = 0 model.eval() coco91class = coco80_to_coco91_class() s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', 'F1') p, r, f1, mp, mr, map, mf1 = 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3) jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (imgs, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): targets = targets.to(device) imgs = imgs.to(device) _, _, height, width = imgs.shape # batch size, channels, height, width # Plot images with bounding boxes if batch_i == 0 and not os.path.exists('test_batch0.jpg'): plot_images(imgs=imgs, targets=targets, paths=paths, fname='test_batch0.jpg') # Run model inf_out, train_out = model(imgs) # inference and training outputs # Compute loss if hasattr(model, 'param'): # if model has loss hyperparameters loss += compute_loss(train_out, targets, model)[1][:3].cpu() # GIoU, obj, cls # Run NMS output = non_max_suppression(inf_out, conf_thres=conf_thres, nms_thres=nms_thres) # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append(([], torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file # with open('test.txt', 'a') as file: # [file.write('%11.5g' * 7 % tuple(x) + '\n') for x in pred] # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = int(Path(paths[si]).stem.split('_')[-1]) box = pred[:, :4].clone() # xyxy scale_coords(imgs[si].shape[1:], box, shapes[si]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for di, d in enumerate(pred): jdict.append({ 'image_id': image_id, 'category_id': coco91class[int(d[6])], 'bbox': [floatn(x, 3) for x in box[di]], 'score': floatn(d[4], 5) }) # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = [0] * len(pred) if nl: detected = [] tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) tbox[:, [0, 2]] *= width tbox[:, [1, 3]] *= height # Search for correct predictions for i, (*pbox, pconf, pcls_conf, pcls) in enumerate(pred): # Break if all targets already located in image if len(detected) == nl: break # Continue if predicted class not among image classes if pcls.item() not in tcls: continue # Best iou, index between pred and targets m = (pcls == tcls_tensor).nonzero().view(-1) iou, bi = bbox_iou(pbox, tbox[m]).max(0) # If iou > threshold and class is correct mark as correct if iou > iou_thres and m[ bi] not in detected: # and pcls == tcls[bi]: correct[i] = 1 detected.append(m[bi]) # Append statistics (correct, conf, pcls, tcls) stats.append((correct, pred[:, 4].cpu(), pred[:, 6].cpu(), tcls)) # Compute statistics stats = [np.concatenate(x, 0) for x in list(zip(*stats))] # to numpy if len(stats): p, r, ap, f1, ap_class = ap_per_class(*stats) mp, mr, map, mf1 = p.mean(), r.mean(), ap.mean(), f1.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%10.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map, mf1)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap[i], f1[i])) # Save JSON if save_json and map and len(jdict): try: imgIds = [ int(Path(x).stem.split('_')[-1]) for x in dataset.img_files ] with open('results.json', 'w') as file: json.dump(jdict, file) from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb cocoGt = COCO( './data-bin/coco2014/annotations/instances_val2014.json' ) # initialize COCO ground truth api cocoDt = cocoGt.loadRes('results.json') # initialize COCO pred api cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds # [:32] # only evaluate these images cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() map = cocoEval.stats[1] # update mAP to pycocotools mAP except: print( 'WARNING: missing dependency pycocotools from requirements.txt. Can not compute official COCO mAP.' ) # Return results maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map, mf1, *(loss / len(dataloader)).tolist()), maps
def train(): cfg = opt.cfg data = opt.data img_size = opt.img_size epochs = 1 if opt.prebias else opt.epochs # 500200 batches at bs 64, 117263 images = 273 epochs batch_size = opt.batch_size accumulate = opt.accumulate # effective bs = batch_size * accumulate = 16 * 4 = 64 weights = opt.weights # initial training weights if 'pw' not in opt.arch: # remove BCELoss positive weights param['cls_pw'] = 1. param['obj_pw'] = 1. # Initialize init_seeds() multi_scale = opt.multi_scale if multi_scale: img_sz_min = round(img_size / 32 / 1.5) + 1 img_sz_max = round(img_size / 32 * 1.5) - 1 img_size = img_sz_max * 32 # initiate with maximum multi_scale size print('Using multi-scale {} - {}'.format(img_sz_min * 32, img_size)) # Configure run data_dict = parse_data_cfg(data) train_path = data_dict['train'] nc = int(data_dict['classes']) # number of classes # Remove previous results for f in glob.glob('*_batch*.jpg') + glob.glob(results_file): os.remove(f) # Initialize model model = Darknet(cfg, arch=opt.arch).to(device) # Optimizer pg0, pg1 = [], [] # optimizer parameter groups for k, v in dict(model.named_parameters()).items(): if 'Conv2d.weight' in k: pg1 += [v] # parameter group 1 (apply weight_decay) else: pg0 += [v] # parameter group 0 if opt.adam: optimizer = optim.Adam(pg0, lr=param['lr0']) # optimizer = AdaBound(pg0, lr=param['lr0'], final_lr=0.1) else: optimizer = optim.SGD(pg0, lr=param['lr0'], momentum=param['momentum'], nesterov=True) optimizer.add_param_group({'params': pg1, 'weight_decay': param['weight_decay']}) # add pg1 with weight_decay del pg0, pg1 cutoff = -1 # backbone reaches to cutoff layer start_epoch = 0 best_fitness = float('inf') attempt_download(weights) if weights.endswith('.pt'): # pytorch format # possible weights are '*.pt', 'yolov3-spp.pt', 'yolov3-tiny.pt' etc. chkpt = torch.load(weights, map_location=device) # load model # if opt.transfer: chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(chkpt['model'], strict=False) # else: # model.load_state_dict(chkpt['model']) # load optimizer if chkpt['optimizer'] is not None: optimizer.load_state_dict(chkpt['optimizer']) best_fitness = chkpt['best_fitness'] # load results if chkpt.get('training_results') is not None: with open(results_file, 'w') as file: file.write(chkpt['training_results']) # write results.txt start_epoch = chkpt['epoch'] + 1 del chkpt elif len(weights) > 0: # darknet format # possible weights are '*.weights', 'yolov3-tiny.conv.15', 'darknet53.conv.74' etc. cutoff = load_darknet_weights(model, weights) if opt.transfer or opt.prebias: # transfer learning edge (yolo) layers nf = int(model.module_defs[model.yolo_layers[0] - 1]['filters']) # yolo layer size (i.e. 255) if opt.prebias: for p in optimizer.param_groups: # lower param count allows more aggressive training # settings: i.e. SGD ~0.1 lr0, ~0.9 momentum p['lr'] *= 100 # lr gain if p.get('momentum') is not None: # for SGD but not Adam p['momentum'] *= 0.9 for p in model.parameters(): if opt.prebias and p.numel() == nf: # train (yolo biases) p.requires_grad = True elif opt.transfer and p.shape[0] == nf: # train (yolo biases+weights) p.requires_grad = True else: # freeze layer p.requires_grad = False # Scheduler https://github.com/ultralytics/yolov3/issues/238 # lf = lambda x: 1 - x / epochs # linear ramp to zero # lf = lambda x: 10 ** (param['lrf'] * x / epochs) # exp ramp # lf = lambda x: 1 - 10 ** (param['lrf'] * (1 - x / epochs)) # inverse exp ramp # scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # scheduler = lr_scheduler.MultiStepLR( # optimizer, milestones=range(59, 70, 1), gamma=0.8, # ) # gradual fall to 0.1*lr0 scheduler = lr_scheduler.MultiStepLR( optimizer, milestones=[round(opt.epochs * x) for x in [0.8, 0.9]], gamma=0.1, ) scheduler.last_epoch = start_epoch - 1 # # Plot lr schedule # y = [] # for _ in range(epochs): # scheduler.step() # y.append(optimizer.param_groups[0]['lr']) # plt.plot(y, label='LambdaLR') # plt.xlabel('epoch') # plt.ylabel('LR') # plt.tight_layout() # plt.savefig('LR.png', dpi=300) # Mixed precision training https://github.com/NVIDIA/apex if mixed_precision: model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0) # Initialize distributed training if torch.cuda.device_count() > 1: dist.init_process_group(backend='nccl', # 'distributed backend' init_method='tcp://127.0.0.1:9999', # distributed training init method world_size=1, # number of nodes for distributed training rank=0) # distributed training node rank model = torch.nn.parallel.DistributedDataParallel(model) model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level # Dataset dataset = LoadImagesAndLabels( train_path, img_size, batch_size, augment=True, param=param, # augmentation hyperparameters rect=opt.rect, # rectangular training image_weights=opt.img_weights, cache_labels=True if epochs > 10 else False, cache_images=False if opt.prebias else opt.cache_images, ) # Dataloader dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, num_workers=min([os.cpu_count(), batch_size, 16]), shuffle=not opt.rect, # Shuffle=True unless rectangular training is used pin_memory=True, collate_fn=dataset.collate_fn, ) # Start training model.nc = nc # attach number of classes to model model.arch = opt.arch # attach yolo architecture model.param = param # attach hyperparameters to model # model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) # attach class weights torch_utils.model_info(model, report='summary') # 'full' or 'summary' nb = len(dataloader) maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' t0 = time.time() print('Starting {} for {} epochs...'.format('prebias' if opt.prebias else 'training', epochs)) for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------ model.train() print(('{:>10s}' * 8).format( 'Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size', )) # Freeze backbone at epoch 0, unfreeze at epoch 1 (optional) freeze_backbone = False if freeze_backbone and epoch < 2: for name, p in model.named_parameters(): if int(name.split('.')[1]) < cutoff: # if layer < 75 p.requires_grad = False if epoch == 0 else True # Update image weights (optional) if dataset.image_weights: w = model.class_weights.cpu().numpy() * (1 - maps) ** 2 # class weights image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w) dataset.indices = random.choices(range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx mloss = torch.zeros(4).to(device) # mean losses pbar = tqdm(enumerate(dataloader), total=nb) # progress bar for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device) targets = targets.to(device) # Multi-Scale training if multi_scale: if ni / accumulate % 10 == 0: # adjust (67% - 150%) every 10 batches img_size = random.randrange(img_sz_min, img_sz_max + 1) * 32 sf = img_size / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / 32.) * 32 for x in imgs.shape[2:]] # new shape (stretched to 32-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Plot images with bounding boxes if ni == 0: fname = 'train_batch{}.jpg'.format(i) plot_images(imgs=imgs, targets=targets, paths=paths, fname=fname) if tb_writer: tb_writer.add_image(fname, cv2.imread(fname)[:, :, ::-1], dataformats='HWC') # Hyperparameter burn-in # n_burn = nb - 1 # min(nb // 5 + 1, 1000) # number of burn-in batches # if ni <= n_burn: # for m in model.named_modules(): # if m[0].endswith('BatchNorm2d'): # m[1].momentum = 1 - i / n_burn * 0.99 # BatchNorm2d momentum falls from 1 - 0.01 # g = (i / n_burn) ** 4 # gain rises from 0 - 1 # for x in optimizer.param_groups: # x['lr'] = param['lr0'] * g # x['weight_decay'] = param['weight_decay'] * g # Run model preds = model(imgs) # Compute loss loss, loss_items = compute_loss(preds, targets, model) if not torch.isfinite(loss): print('WARNING: non-finite loss, ending training ', loss_items) return results # Scale loss by nominal batch_size of 64 loss *= batch_size / 64 # Compute gradient if mixed_precision: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # Accumulate gradient for x batches before optimizing if ni % accumulate == 0: optimizer.step() optimizer.zero_grad() # Print batch results mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0 # (GB) s = ('{:>10s}' * 2 + '{:10.3g}' * 6).format( '{:g}/{:g}'.format(epoch, epochs - 1), '{:.3g}G'.format(mem), *mloss, len(targets), img_size) pbar.set_description(s) # end batch ------------------------------------------------------------------------------------------------ # Update scheduler scheduler.step() # Process epoch results final_epoch = epoch + 1 == epochs if opt.prebias: print_model_biases(model) else: # Calculate mAP (always test final epoch, skip first 10 if opt.nosave) if not (opt.notest or (opt.nosave and epoch < 10)) or final_epoch: with torch.no_grad(): results, maps = test.test( cfg, data, batch_size=batch_size, img_size=opt.img_size, model=model, conf_thres=0.001 if final_epoch and epoch > 0 else 0.1, # 0.1 for speed save_json=final_epoch and epoch > 0 and 'coco.data' in data, ) # Write epoch results with open(results_file, 'a') as f: f.write(s + ('%10.3g' * 7).format(results) + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) # Write Tensorboard results if tb_writer: x = list(mloss) + list(results) titles = [ 'GIoU', 'Objectness', 'Classification', 'Train loss', 'Precision', 'Recall', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification', ] for xi, title in zip(x, titles): tb_writer.add_scalar(title, xi, epoch) # Update best mAP fitness = sum(results[4:]) # total loss if fitness < best_fitness: best_fitness = fitness # Save training results save = (not opt.nosave) or (final_epoch and not opt.evolve) or opt.prebias if save: with open(results_file, 'r') as f: # Create checkpoint chkpt = {'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': model.module.state_dict() if type( model) is nn.parallel.DistributedDataParallel else model.state_dict(), 'optimizer': None if final_epoch else optimizer.state_dict()} # Save last checkpoint torch.save(chkpt, last) # Save best checkpoint if best_fitness == fitness: torch.save(chkpt, best) # Save backup every 10 epochs (optional) if epoch > 0 and epoch % 10 == 0: torch.save(chkpt, wdir + 'backup{}.pt'.format(epoch)) # Delete checkpoint del chkpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if len(opt.name) and not opt.prebias: fresults = 'results{}.txt'.format(opt.name) flast = 'last{}.pt'.format(opt.name) fbest = 'best{}.pt'.format(opt.name) os.rename('results.txt', fresults) os.rename(wdir + 'last.pt', wdir + flast) if os.path.exists(wdir + 'last.pt') else None os.rename(wdir + 'best.pt', wdir + fbest) if os.path.exists(wdir + 'best.pt') else None # save to cloud if opt.bucket: os.system('gsutil cp {} {} gs://{}'.format(fresults, wdir + flast, opt.bucket)) plot_results() # save as results.png print('{} epochs completed in {:.3f} hours.\n'.format( epoch - start_epoch + 1, (time.time() - t0) / 3600), ) dist.destroy_process_group() if torch.cuda.device_count() > 1 else None torch.cuda.empty_cache() return results
def train(data_cfg='cfg/voc_coco.data', accumulate=1): device = select_device() # Configure run get_data_cfg = parse_data_cfg(data_cfg) #返回训练配置参数,类型:字典 gpus = get_data_cfg['gpus'] num_workers = int(get_data_cfg['num_workers']) cfg_model = get_data_cfg['cfg_model'] train_path = get_data_cfg['train'] valid_ptah = get_data_cfg['valid'] num_classes = int(get_data_cfg['classes']) finetune_model = get_data_cfg['finetune_model'] batch_size = int(get_data_cfg['batch_size']) img_size = int(get_data_cfg['img_size']) multi_scale = get_data_cfg['multi_scale'] epochs = int(get_data_cfg['epochs']) lr_step = str(get_data_cfg['lr_step']) if multi_scale == 'True': multi_scale = True else: multi_scale = False print('data_cfg : ', data_cfg) print('voc.data config len : ', len(get_data_cfg)) print('gpus : ', gpus) print('num_workers : ', num_workers) print('model : ', cfg_model) print('finetune_model : ', finetune_model) print('train_path : ', train_path) print('valid_ptah : ', valid_ptah) print('num_classes : ', num_classes) print('batch_size : ', batch_size) print('img_size : ', img_size) print('multi_scale : ', multi_scale) print('lr_step : ', lr_step) # load model if "-tiny" in cfg_model: model = Yolov3Tiny(num_classes) weights = './weights-yolov3-tiny/' else: model = Yolov3(num_classes) weights = './weights-yolov3/' # mkdir save model document if not os.path.exists(weights): os.mkdir(weights) model = model.to(device) latest = weights + 'latest.pt' best = weights + 'best.pt' # Optimizer lr0 = 0.001 # initial learning rate optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005) start_epoch = 0 if os.access(finetune_model, os.F_OK): # load retrain/finetune_model print('loading yolo-v3 finetune_model ~~~~~~', finetune_model) not_load_filters = 3 * (80 + 5) # voc: 3*(20+5), coco: 3*(80+5)=255 chkpt = torch.load(finetune_model, map_location=device) model.load_state_dict( { k: v for k, v in chkpt['model'].items() if v.numel() > 1 and v.shape[0] != not_load_filters }, strict=False) # model.load_state_dict(chkpt['model']) start_epoch = chkpt['epoch'] if chkpt['optimizer'] is not None: optimizer.load_state_dict(chkpt['optimizer']) best_loss = chkpt['best_loss'] # Set scheduler (reduce lr at epochs 218, 245, i.e. batches 400k, 450k) gamma:学习率下降的乘数因子 milestones = [int(i) for i in lr_step.split(",")] print('milestones : ', milestones) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[int(i) for i in lr_step.split(",")], gamma=0.1, last_epoch=start_epoch - 1) # Initialize distributed training if torch.cuda.device_count() > 1: dist.init_process_group(backend=opt.backend, init_method=opt.dist_url, world_size=opt.world_size, rank=opt.rank) model = torch.nn.parallel.DistributedDataParallel(model) # Dataset print('multi_scale : ', multi_scale) dataset = LoadImagesAndLabels(train_path, batch_size=batch_size, img_size=img_size, augment=True, multi_scale=multi_scale) # Dataloader dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory=False, drop_last=False, collate_fn=dataset.collate_fn) # Start training t = time.time() model_info(model) nB = len(dataloader) n_burnin = min(round(nB / 5 + 1), 1000) # burn-in batches best_loss = float('inf') test_loss = float('inf') for epoch in range(start_epoch, epochs): print() model.train() # Update scheduler scheduler.step() mloss = defaultdict(float) # mean loss for i, (imgs, targets, img_path_, _) in enumerate(dataloader): multi_size = imgs.size() imgs = imgs.to(device) targets = targets.to(device) nt = len(targets) if nt == 0: # if no targets continue continue # SGD burn-in if epoch == 0 and i <= n_burnin: lr = lr0 * (i / n_burnin)**4 for x in optimizer.param_groups: x['lr'] = lr # Run model pred = model(imgs) # Build targets target_list = build_targets(model, targets) # Compute loss loss, loss_dict = compute_loss(pred, target_list) # Compute gradient loss.backward() # Accumulate gradient for x batches before optimizing if (i + 1) % accumulate == 0 or (i + 1) == nB: optimizer.step() optimizer.zero_grad() # Running epoch-means of tracked metrics for key, val in loss_dict.items(): mloss[key] = (mloss[key] * i + val) / (i + 1) print( 'Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.2f}, wh {:.2f}, ' 'conf {:.2f}, cls {:.2f}, total {:.2f}, time {:.3f}s'.format( epoch, epochs - 1, i, nB - 1, multi_size[2], multi_size[3], nt, scheduler.get_lr()[0], mloss['xy'], mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], time.time() - t)) s = ('%8s%12s' + '%10.3g' * 7) % ( '%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nB - 1), mloss['xy'], mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], nt, time.time() - t) t = time.time() if epoch % 10 == 0: # Calculate mAP print('\n') with torch.no_grad(): print("-------" * 5 + "testing" + "-------" * 5) results = test.test(cfg_model, data_cfg, batch_size=batch_size, img_size=img_size, model=model) # Update best loss test_loss = results[4] if test_loss < best_loss: best_loss = test_loss if True: # Create checkpoint chkpt = { 'epoch': epoch, 'best_loss': best_loss, 'model': model.module.state_dict() if type(model) is nn.parallel.DistributedDataParallel else model.state_dict(), 'optimizer': optimizer.state_dict() } # Save latest checkpoint torch.save(chkpt, latest) # Save best checkpoint if best_loss == test_loss and epoch % 5 == 0: torch.save(chkpt, best) # Save backup every 10 epochs (optional) if epoch > 0 and epoch % 5 == 0: torch.save(chkpt, weights + 'backup%g.pt' % epoch) # Delete checkpoint del chkpt
def test(cfg, data, batch_size, img_size, conf_thres, iou_thres, nms_thres, src_txt_path='./valid.txt', dst_path='./output', weights=None, model=None, log_file_path='log.txt'): # 0、初始化一些参数 if not os.path.exists(dst_path): os.mkdir(dst_path) data = parse_data_cfg(data) nc = int(data['classes']) # number of classes class_names = load_classes(data['names']) # 1、加载网络 if model is None: device = select_device(opt.device) model = Darknet(cfg) if weights.endswith('.pt'): # TODO: .weights权重格式 model.load_state_dict( torch.load(weights)['model']) # TODO:map_location=device ? if torch.cuda.device_count() > 1: model = nn.DataParallel(model) # clw note: 多卡 else: device = next(model.parameters()).device # get model device model.to(device).eval() # 2、加载数据集 test_dataset = VocDataset(src_txt_path, img_size, with_label=True, is_training=False) dataloader = DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, # TODO collate_fn=test_dataset.test_collate_fn, # TODO pin_memory=True) # 3、预测,前向传播 s = ('%20s' + '%10s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', 'mAP@{}'.format(iou_thres), 'F1') pbar = tqdm(dataloader) for i, (img_tensor, _, img_path, shapes) in enumerate(pbar): start = time.time() img_tensor = img_tensor.to(device) # (bs, 3, 416, 416) # Disable gradients with torch.no_grad(): # (1) Run model output = model(img_tensor) # [0] # (2) NMS nms_output = non_max_suppression(output, conf_thres, nms_thres) # list (64,) s = 'time use per batch: %.3fs' % (time.time() - start) pbar.set_description(s) for batch_idx, pred in enumerate( nms_output ): # pred: (bs, 7) -> xyxy, obj_conf*class_conf, class_conf, cls_idx ################################################ if pred is None: continue bboxes_prd = torch.cat((pred[:, 0:5], pred[:, 6].unsqueeze(1)), dim=1).cpu().numpy() ###### clw note: coord transform to origin size(because of resize and so on....) is really important !!! scale_coords(img_tensor[batch_idx].shape[1:], bboxes_prd, shapes[batch_idx][0], shapes[batch_idx][1]) # to original shape ###### for bbox in bboxes_prd: coor = np.array(bbox[:4], dtype=np.int32) score = bbox[4] class_ind = int(bbox[5]) class_name = class_names[class_ind] classes_pred.add(class_name) score = '%.4f' % score xmin, ymin, xmax, ymax = map(str, coor) s = ' '.join([ str(img_path[batch_idx]), str(score), xmin, ymin, xmax, ymax ]) + '\n' with open( os.path.join(result_path, 'comp4_det_test_' + class_name + '.txt'), 'a') as f: f.write(s) ################################################ return calc_APs()
def train(data_cfg='cfg/voc.data', accumulate=1): device = select_device() # Config get_data_cfg = parse_data_cfg(data_cfg) #返回训练配置参数,类型:字典 gpus = get_data_cfg['gpus'] num_workers = int(get_data_cfg['num_workers']) cfg_model = get_data_cfg['cfg_model'] train_path = get_data_cfg['train'] valid_ptah = get_data_cfg['valid'] num_classes = int(get_data_cfg['classes']) finetune_model = get_data_cfg['finetune_model'] batch_size = int(get_data_cfg['batch_size']) img_size = int(get_data_cfg['img_size']) multi_scale = get_data_cfg['multi_scale'] epochs = int(get_data_cfg['epochs']) lr_step = str(get_data_cfg['lr_step']) if multi_scale == 'True': multi_scale = True else: multi_scale = False print('data_cfg : ', data_cfg) print('voc.data config len : ', len(get_data_cfg)) print('gpus : ', gpus) print('num_workers : ', num_workers) print('model : ', cfg_model) print('finetune_model : ', finetune_model) print('train_path : ', train_path) print('valid_ptah : ', valid_ptah) print('num_classes : ', num_classes) print('batch_size : ', batch_size) print('img_size : ', img_size) print('multi_scale : ', multi_scale) print('lr_step : ', lr_step) # load model if "tiny" in cfg_model: a_scalse = 416. / img_size anchors = [(10, 14), (23, 27), (37, 58), (81, 82), (135, 169), (344, 319)] anchors_new = [(int(anchors[j][0] / a_scalse), int(anchors[j][1] / a_scalse)) for j in range(len(anchors))] print('old anchors : ', anchors) model = Yolov3Tiny(num_classes, anchors=anchors_new) weights = './weights-yolov3-tiny/' else: a_scalse = 416. / img_size anchors = [(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)] anchors_new = [(int(anchors[j][0] / a_scalse), int(anchors[j][1] / a_scalse)) for j in range(len(anchors))] model = Yolov3(num_classes, anchors=anchors_new) weights = './weights-yolov3/' # make dir save model document if not os.path.exists(weights): os.mkdir(weights) latest = weights + 'latest.pt' best = weights + 'best.pt' # Optimizer lr0 = 0.001 # initial learning rate optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=0.9, weight_decay=0.0005) start_epoch = 0 model = model.to(device) print(finetune_model) if os.access(finetune_model, os.F_OK): print( '\n/************************** load_model *************************/' ) print(finetune_model) load_model(model, torch.load(finetune_model)) else: print('finetune_model not exist !') milestones = [int(i) for i in lr_step.split(",")] print('milestones : ', milestones) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[int(i) for i in lr_step.split(",")], gamma=0.1, last_epoch=start_epoch - 1) # Initialize distributed training if torch.cuda.device_count() > 1: dist.init_process_group(backend=opt.backend, init_method=opt.dist_url, world_size=opt.world_size, rank=opt.rank) model = torch.nn.parallel.DistributedDataParallel(model) # Dataset print('multi_scale : ', multi_scale) dataset = LoadImagesAndLabels(train_path, batch_size=batch_size, img_size=img_size, augment=True, multi_scale=multi_scale) print('--------------->>> imge num : ', dataset.__len__()) # Dataloader dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True, pin_memory=False, drop_last=False, collate_fn=dataset.collate_fn) # Start training t = time.time() model_info(model) nB = len(dataloader) n_burnin = min(round(nB / 5 + 1), 1000) # burn-in batches best_loss = float('inf') test_loss = float('inf') for epoch in range(start_epoch, epochs): print('') model.train() scheduler.step() mloss = defaultdict(float) # mean loss for i, (imgs, targets, img_path_, _) in enumerate(dataloader): multi_size = imgs.size() imgs = imgs.to(device) targets = targets.to(device) nt = len(targets) if nt == 0: # if no targets continue continue # SGD burn-in if epoch == 0 and i <= n_burnin: lr = lr0 * (i / n_burnin)**4 for x in optimizer.param_groups: x['lr'] = lr # Run model pred = model(imgs) # Build targets target_list = build_targets(model, targets) # Compute loss loss, loss_dict = compute_loss(pred, target_list) # Compute gradient loss.backward() # Accumulate gradient for x batches before optimizing if (i + 1) % accumulate == 0 or (i + 1) == nB: optimizer.step() optimizer.zero_grad() # Running epoch-means of tracked metrics for key, val in loss_dict.items(): mloss[key] = (mloss[key] * i + val) / (i + 1) print( 'Epoch {:3d}/{:3d}, Batch {:6d}/{:6d}, Img_size {}x{}, nTargets {}, lr {:.6f}, loss: xy {:.2f}, wh {:.2f}, ' 'conf {:.2f}, cls {:.2f}, total {:.2f}, time {:.3f}s'.format( epoch, epochs - 1, i, nB - 1, multi_size[2], multi_size[3], nt, scheduler.get_lr()[0], mloss['xy'], mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], time.time() - t), end='\r') s = ('%8s%12s' + '%10.3g' * 7) % ( '%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, nB - 1), mloss['xy'], mloss['wh'], mloss['conf'], mloss['cls'], mloss['total'], nt, time.time() - t) t = time.time() if epoch % 5 == 0 and epoch > 0: # Calculate mAP print('\n') with torch.no_grad(): print("-------" * 5 + "testing" + "-------" * 5) results = test.test(cfg_model, data_cfg, batch_size=batch_size, img_size=img_size, model=model) # Update best loss test_loss = results[4] if test_loss < best_loss: best_loss = test_loss if True: # Create checkpoint chkpt = { 'epoch': epoch, 'best_loss': best_loss, 'model': model.module.state_dict() if type(model) is nn.parallel.DistributedDataParallel else model.state_dict(), 'optimizer': optimizer.state_dict() } # Save latest checkpoint torch.save(chkpt, latest) # Save best checkpoint if best_loss == test_loss and epoch % 5 == 0: torch.save(chkpt, best) # Save backup every 10 epochs (optional) if epoch > 0 and epoch % 5 == 0: torch.save(chkpt, weights + 'Detect%g.pt' % epoch) # Delete checkpoint del chkpt
def train(): # 0、Initialize parameters( set random seed, get cfg info, ) cfg = opt.cfg weights = opt.weights img_size = opt.img_size batch_size = opt.batch_size total_epochs = opt.epochs init_seeds() data = parse_data_cfg(opt.data) train_txt_path = data['train'] valid_txt_path = data['valid'] nc = int(data['classes']) # 0、打印配置文件信息,写log等 print('config file:', cfg) print('pretrained weights:', weights) # 1、加载模型 model = Darknet(cfg).to(device) if weights.endswith('.pt'): ### model.load_state_dict(torch.load(weights)['model']) # 错误原因:没有考虑类别对不上的那一层,也就是yolo_layer前一层 # 会报错size mismatch for module_list.81.Conv2d.weight: copying a param with shape torch.size([255, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([75, 1024, 1, 1]). # TODO:map_location=device ? chkpt = torch.load(weights, map_location=device) try: chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(chkpt['model'], strict=False) # model.load_state_dict(chkpt['model']) except KeyError as e: s = "%s is not compatible with %s" % (opt.weights, opt.cfg) raise KeyError(s) from e write_to_file(repr(opt), log_file_path, mode='w') write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path) elif weights.endswith('.pth'): # for 'https://download.pytorch.org/models/resnet50-19c8e357.pth', model_state_dict = model.state_dict() chkpt = torch.load(weights, map_location=device) #try: state_dict = {} block_cnt = 0 fc_item_num = 2 chkpt_keys = list(chkpt.keys()) model_keys = list(model.state_dict().keys()) model_values = list(model.state_dict().values()) for i in range(len(chkpt_keys) - fc_item_num): # 102 - 2 if i % 5 == 0: state_dict[model_keys[i+block_cnt]] = chkpt[chkpt_keys[i]] elif i % 5 == 1 or i % 5 == 2: state_dict[model_keys[i+block_cnt+2]] = chkpt[chkpt_keys[i]] elif i % 5 == 3 or i % 5 == 4: state_dict[model_keys[i+block_cnt-2]] = chkpt[chkpt_keys[i]] if i % 5 == 4: block_cnt += 1 state_dict[model_keys[i + block_cnt]] = model_values[i + block_cnt] #chkpt['model'] = {k: v for k, v in chkpt['model'].items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(state_dict, strict=False) # model.load_state_dict(chkpt['model']) # except KeyError as e: # s = "%s is not compatible with %s" % (opt.weights, opt.cfg) # raise KeyError(s) from e write_to_file(repr(opt), log_file_path, mode='w') write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path) elif len(weights) > 0: # darknet format # possible weights are '*.weights', 'yolov3-tiny.conv.15', 'darknet53.conv.74' etc. load_darknet_weights(model, weights) write_to_file(repr(opt), log_file_path, mode='w') write_to_file('anchors:\n' + repr(model.module_defs[model.yolo_layers[0]]['anchors']), log_file_path) # else: # raise Exception("pretrained model's path can't be NULL!") # 2、设置优化器 和 学习率 start_epoch = 0 #optimizer = torch.optim.SGD(model.parameters(), lr=lr0, momentum=momentum, weight_decay=weight_decay, nesterov=True) # TODO:nesterov ? weight_decay=0.0005 ? # Optimizer pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in dict(model.named_parameters()).items(): if '.bias' in k: pg2 += [v] # biases elif 'Conv2d.weight' in k: pg1 += [v] # apply weight_decay else: pg0 += [v] # parameter group 0 optimizer = torch.optim.SGD(pg0, lr=lr0, momentum=momentum, nesterov=True) optimizer.add_param_group({'params': pg1, 'weight_decay': weight_decay}) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) del pg0, pg1, pg2 ###### apex need ###### if mixed_precision: model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0) # Initialize distributed training if torch.cuda.device_count() > 1: dist.init_process_group(backend='nccl', # 'distributed backend' init_method='tcp://127.0.0.1:9999', # distributed training init method world_size=1, # number of nodes for distributed training rank=0) # distributed training node rank model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True) # clw note: 多卡,在 amp.initialize()之后调用分布式代码 DistributedDataParallel否则报错 model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level ###### model.nc = nc #### 阶梯学习率 scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[round(total_epochs * x) for x in [0.8, 0.9]], gamma=0.1) ### 余弦学习率 #lf = lambda x: (1 + math.cos(x * math.pi / total_epochs)) / 2 #scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # 3、加载数据集 train_dataset = VocDataset(train_txt_path, img_size, with_label=True) dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, # TODO: True num_workers=8, # TODO collate_fn=train_dataset.train_collate_fn, pin_memory=True) # 4、训练 print('') # 换行 print('Starting training for %g epochs...' % total_epochs) nb = len(dataloader) mloss = torch.zeros(4).to(device) # mean losses writer = SummaryWriter() # tensorboard --logdir=runs, view at http://localhost:6006/ prebias = start_epoch == 0 for epoch in range(start_epoch, total_epochs): # epoch ------------------------------ model.train() # 写在这里,是因为在一个epoch结束后,调用test.test()时,会调用 model.eval() # # Prebias # if prebias: # if epoch < 3: # prebias # ps = 0.1, 0.9 # prebias settings (lr=0.1, momentum=0.9) # else: # normal training # ps = lr0, momentum # normal training settings # print_model_biases(model) # prebias = False # # # Bias optimizer settings # optimizer.param_groups[2]['lr'] = ps[0] # if optimizer.param_groups[2].get('momentum') is not None: # for SGD but not Adam # optimizer.param_groups[2]['momentum'] = ps[1] start = time.time() title = ('\n' + '%10s' * 11 ) % ('Epoch', 'Batch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size', 'lr', 'time_use') print(title) #pbar = tqdm(dataloader, ncols=20) # 行数参数ncols=10,这个值可以自己调:尽量大到不能引起上下滚动,同时满足美观的需求。 #for i, (img_tensor, target_tensor, img_path, _) in enumerate(pbar): # # Freeze darknet53.conv.74 for first epoch # freeze_backbone = False # if freeze_backbone and (epoch < 3): # for i, (name, p) in enumerate(model.named_parameters()): # if int(name.split('.')[2]) < 75: # if layer < 75 # 多卡是[2],单卡[1] # p.requires_grad = False if (epoch < 3) else True for i, (img_tensor, target_tensor, img_path, _) in enumerate(dataloader): # # SGD burn-in # ni = epoch * nb + i # if ni <= 1000: # n_burnin = 1000 # lr = lr0 * (ni / 1000) ** 2 # for g in optimizer.param_groups: # g['lr'] = lr batch_start = time.time() #print(img_path) img_tensor = img_tensor.to(device) target_tensor = target_tensor.to(device) ### 训练过程主要包括以下几个步骤: # (1) 前传 #print('img_tensor:', img_tensor[0][1][208][208]) pred = model(img_tensor) # (2) 计算损失 loss, loss_items = compute_loss(pred, target_tensor, model) if not torch.isfinite(loss): raise Exception('WARNING: non-finite loss, ending training ', loss_items) # (3) 损失:反向传播,求出梯度 if mixed_precision: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # (4) 优化器:更新参数、梯度清零 # ni = i + nb * epoch # number integrated batches (since train start) # if ni % accumulate == 0: # Accumulate gradient for x batches before optimizing optimizer.step() optimizer.zero_grad() # Print batch results mloss = (mloss * i + loss_items) / (i + 1) # update mean losses mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0 # (GB) #s = ('%10s' * 2 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, scheduler.get_lr()[0], time.time()-batch_start) #s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, optimizer.state_dict()['param_groups'][0]['lr'], time.time()-batch_start) s = ('%10s' * 3 + '%10.3g' * 7 + '%10.3gs') % ('%g/%g' % (epoch, total_epochs - 1), '%g/%g' % (i, nb - 1), '%.3gG' % mem, *mloss, len(target_tensor), img_size, scheduler.get_lr()[0], time.time()-batch_start) if i % 10 == 0: print(s) # Plot if epoch == start_epoch and i == 0: fname = 'train_batch.jpg' # filename cur_path = os.getcwd() res = plot_images(images=img_tensor, targets=target_tensor, paths=img_path, fname=os.path.join(cur_path, fname)) writer.add_image(fname, res, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ print('time use per epoch: %.3fs' % (time.time() - start)) write_to_file(title, log_file_path) write_to_file(s, log_file_path) # Update scheduler scheduler.step() # compute mAP results, maps = test.test(cfg, 'cfg/voc.data', batch_size=batch_size, img_size=img_size, conf_thres=0.05, iou_thres=0.5, nms_thres=0.5, src_txt_path=valid_txt_path, dst_path='./output', weights=None, model=model, log_file_path = log_file_path) # Tensorboard tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/F1'] for x, tag in zip(list(mloss[:-1]) + list(results), tags): writer.add_scalar(tag, x, epoch) # save model 保存模型 chkpt = {'epoch': epoch, 'model': model.module.state_dict() if type(model) is nn.parallel.DistributedDataParallel else model.state_dict(), # clw note: 多卡 'optimizer': optimizer.state_dict()} torch.save(chkpt, last_model_path) print('end')