def test_emb( cfg, data_cfg, weights, batch_size=16, iou_thres=0.5, conf_thres=0.3, nms_thres=0.45, print_interval=40, ): # Configure run f = open(data_cfg) data_cfg_dict = json.load(f) f.close() test_paths = data_cfg_dict['test_emb'] dataset_root = data_cfg_dict['root'] cfg_dict = parse_model_cfg(cfg) img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])] # Initialize model model = Darknet(cfg_dict, test_emb=True) # Load weights if weights.endswith('.pt'): # pytorch format model.load_state_dict(torch.load(weights, map_location='cpu')['model'], strict=False) else: # darknet format load_darknet_weights(model, weights) #model = torch.nn.DataParallel(model) model = torch.nn.parallel.DistributedDataParallel( model, find_unused_parameters=True) model.cuda().eval() # Get dataloader transforms = T.Compose([T.ToTensor()]) dataset = JointDataset(dataset_root, test_paths, img_size, augment=False, transforms=transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=8, drop_last=False, collate_fn=collate_fn) embedding, id_labels = [], [] print('Extracting pedestrain features...') for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader): t = time.time() output = model(imgs.cuda(), targets.cuda(), targets_len.cuda()).squeeze() for out in output: feat, label = out[:-1], out[-1].long() if label != -1: embedding.append(feat) id_labels.append(label) if batch_i % print_interval == 0: print( 'Extracting {}/{}, # of instances {}, time {:.2f} sec.'.format( batch_i, len(dataloader), len(id_labels), time.time() - t)) print('Computing pairwise similairity...') if len(embedding) < 1: return None embedding = torch.stack(embedding, dim=0).cuda() id_labels = torch.LongTensor(id_labels) n = len(id_labels) print(n, len(embedding)) assert len(embedding) == n embedding = F.normalize(embedding, dim=1) pdist = torch.mm(embedding, embedding.t()).cpu().numpy() gt = id_labels.expand(n, n).eq(id_labels.expand(n, n).t()).numpy() up_triangle = np.where(np.triu(pdist) - np.eye(n) * pdist != 0) pdist = pdist[up_triangle] gt = gt[up_triangle] far_levels = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1] far, tar, threshold = metrics.roc_curve(gt, pdist) interp = interpolate.interp1d(far, tar) tar_at_far = [interp(x) for x in far_levels] for f, fa in enumerate(far_levels): print('TPR@FAR={:.7f}: {:.4f}'.format(fa, tar_at_far[f])) return tar_at_far
def train( cfg, data_cfg, resume=False, epochs=100, batch_size=16, accumulated_batches=1, freeze_backbone=False, opt=None, ): weights = '../weights' # 改到上一层, 这样方便文件夹复制 mkdir_if_missing(weights) latest = osp.join(weights, 'latest.pt') # 这个是为了resume上次存好的checkpoint,注意不要覆盖! torch.backends.cudnn.benchmark = True # unsuitable for multiscale # Configure run print("loading data") sys.stdout.flush() f = open(data_cfg) data_config = json.load(f) trainset_paths = data_config['train'] dataset_root = data_config['root'] f.close() cfg_dict = parse_model_cfg(cfg) img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])] # Get dataloader transforms = T.Compose([T.ToTensor()]) dataset = JointDataset(dataset_root, trainset_paths, img_size, augment=True, transforms=transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=False, drop_last=True, collate_fn=collate_fn) # Initialize model print("building model") sys.stdout.flush() model = Darknet(cfg_dict, dataset.nID) cutoff = -1 # backbone reaches to cutoff layer start_epoch = 0 if resume: # pretrain = "/home/master/kuanzi/weights/jde_1088x608_uncertainty.pt" pretrain = "/home/master/kuanzi/weights/jde_864x480_uncertainty.pt" #576x320 print("Loading finetune weight...", pretrain) sys.stdout.flush() checkpoint = torch.load(pretrain, map_location='cpu') model_dict = model.state_dict() pretrained_dict = { k: v for k, v in checkpoint['model'].items() if not k.startswith("classifier") } # 去掉全连接层 model_dict.update(pretrained_dict) model.load_state_dict(model_dict) model.cuda().train() print("model weight loaded") sys.stdout.flush() classifer_param_value = list(map(id, model.classifier.parameters())) classifer_param = model.classifier.parameters() base_params = filter(lambda p: id(p) not in classifer_param_value, model.parameters()) print("classifer_param\n", classifer_param) # [2218660649072] print("classifer_param_value\n", classifer_param_value) # [2218660649072] print("base_params\n", base_params) # <filter object at 0x0000020493D95048> sys.stdout.flush() # optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr * 0.1, momentum=.9) optimizer = torch.optim.SGD( [{ 'params': filter(lambda x: x.requires_grad, base_params), 'lr': opt.lr * 0.01 }, { 'params': classifer_param, 'lr': opt.lr }], momentum=.9) print("chk epoch:\n", checkpoint['epoch']) sys.stdout.flush() start_epoch = checkpoint['epoch'] + 1 else: # Initialize model with backbone (optional) print("Loading backbone...") sys.stdout.flush() if cfg.endswith('yolov3.cfg'): load_darknet_weights(model, osp.join(weights, 'darknet53.conv.74')) cutoff = 75 elif cfg.endswith('yolov3-tiny.cfg'): load_darknet_weights(model, osp.join(weights, 'yolov3-tiny.conv.15')) cutoff = 15 model.cuda().train() # Set optimizer optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9, weight_decay=1e-4) """ sparsity: network slimming """ # alpha = opt.alpha # parser.add_argument("--alpha",type=float,default=1.,help="bn层放缩系数") alpha = 1 model = scale_gama(alpha, model, scale_down=True) #记录哪些是shortcut层 donntprune = [] for k, m in enumerate(model.modules()): if isinstance(m, shortcutLayer): # EmptyLayer # if isinstance(m, EmptyLayer): # TODO, x = k + m.froms - 8 donntprune.append(x) x = k - 3 donntprune.append(x) print(donntprune) model = torch.nn.DataParallel(model) # Set scheduler scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[int(0.5 * opt.epochs), int(0.75 * opt.epochs)], gamma=0.1) # An important trick for detection: freeze bn during fine-tuning if not opt.unfreeze_bn: for i, (name, p) in enumerate(model.named_parameters()): p.requires_grad = False if 'batch_norm' in name else True model_info(model) t0 = time.time() print("begin training...") sys.stdout.flush() for epoch in range(epochs): epoch += start_epoch logger.info( ('%8s%12s' + '%10s' * 6) % ('Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time')) # Freeze darknet53.conv.74 for first epoch if freeze_backbone and (epoch < 2): for i, (name, p) in enumerate(model.named_parameters()): if int(name.split('.')[2]) < cutoff: # if layer < 75 p.requires_grad = False if (epoch == 0) else True ui = -1 rloss = defaultdict(float) # running loss optimizer.zero_grad() for i, (imgs, targets, _, _, targets_len) in enumerate(dataloader): if sum([len(x) for x in targets]) < 1: # if no targets continue continue # SGD burn-in burnin = min(1000, len(dataloader)) if (epoch == 0) & (i <= burnin): lr = opt.lr * (i / burnin)**4 for g in optimizer.param_groups: g['lr'] = lr # Compute loss, compute gradient, update parameters loss, components = model(imgs.cuda(), targets.cuda(), targets_len.cuda()) components = torch.mean(components.view(-1, 5), dim=0) loss = torch.mean(loss) loss.backward() """ TODO, sparsity training """ if opt.sr: # def updateBN(model,s,donntprune) updateBN(model, opt.s, donntprune) # accumulate gradient for x batches before optimizing if ((i + 1) % accumulated_batches == 0) or (i == len(dataloader) - 1): optimizer.step() optimizer.zero_grad() # Running epoch-means of tracked metrics ui += 1 for ii, key in enumerate(model.module.loss_names): rloss[key] = (rloss[key] * ui + components[ii]) / (ui + 1) s = ('%8s%12s' + '%10.3g' * 6) % ( '%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, len(dataloader) - 1), rloss['box'], rloss['conf'], rloss['id'], rloss['loss'], rloss['nT'], time.time() - t0) t0 = time.time() if i % opt.print_interval == 0: logger.info(s) # Save latest checkpoint checkpoint = { 'epoch': epoch, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(checkpoint, latest) scheduler.step() # Calculate mAP if epoch % opt.test_interval == 0 and epoch != 0: epoch_chk = osp.join( weights, str(epoch) + '_epoch_diou_arcface_sparsity.pt') checkpoint = { 'epoch': epoch, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(checkpoint, epoch_chk) """ sparsity training """ # https://github.com/foolwood/pytorch-slimming/blob/7d13c090720a2e614bf638f0a1e66aa97bffee0b/prune.py#L43 if opt.sr: model.train(False) total = 0 for k, m in enumerate(model.modules()): if isinstance(m, nn.BatchNorm2d): if k not in donntprune: total += m.weight.data.shape[0] bn = torch.zeros(total) index = 0 for k, m in enumerate(model.modules()): if isinstance(m, nn.BatchNorm2d): if k not in donntprune: size = m.weight.data.shape[0] bn[index:(index + size)] = m.weight.data.abs().clone() index += size y, i = torch.sort(bn) # y,i是从小到大排列所有的bn,y是weight,i是序号 number = int(len(y) / 5) # 将总类分为5组.将参数量分为5分,这样容易控制百分比 # 输出稀疏化水平 print( "0~20%%:%f,20~40%%:%f,40~60%%:%f,60~80%%:%f,80~100%%:%f" % (y[number], y[2 * number], y[3 * number], y[4 * number], y[-1])) model.train() model = scale_gama(alpha, model, scale_down=False) if isinstance(model, torch.nn.DataParallel): model.module.save_weights("%s/yolov3_sparsity_%d.weights" % (weights, epoch)) else: model.save_weights("%s/yolov3_sparsity_%d.weights" % (weights, epoch)) model = scale_gama(alpha, model, scale_down=True) print("save weights in %s/yolov3_sparsity_%d.weights" % (weights, epoch))
def test( cfg, data_cfg, weights, batch_size=16, iou_thres=0.5, conf_thres=0.3, nms_thres=0.45, print_interval=40, ): # Configure run f = open(data_cfg) data_cfg_dict = json.load(f) f.close() #nC = int(data_cfg_dict['classes']) # number of classes (80 for COCO) nC = 1 test_path = data_cfg_dict['test'] dataset_root = data_cfg_dict['root'] cfg_dict = parse_model_cfg(cfg) img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])] # Initialize model model = Darknet(cfg_dict, test_emb=False) # Load weights if weights.endswith('.pt'): # pytorch format model.load_state_dict(torch.load(weights, map_location='cpu')['model'], strict=False) else: # darknet format load_darknet_weights(model, weights) #model = torch.nn.DataParallel(model) model = torch.nn.parallel.DistributedDataParallel( model, find_unused_parameters=True) model.cuda().eval() # Get dataloader transforms = T.Compose([T.ToTensor()]) dataset = JointDataset(dataset_root, test_path, img_size, augment=False, transforms=transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=8, drop_last=False, collate_fn=collate_fn) mean_mAP, mean_R, mean_P, seen = 0.0, 0.0, 0.0, 0 print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) outputs, mAPs, mR, mP, TP, confidence, pred_class, target_class, jdict = \ [], [], [], [], [], [], [], [], [] AP_accum, AP_accum_count = np.zeros(nC), np.zeros(nC) for batch_i, (imgs, targets, paths, shapes, targets_len) in enumerate(dataloader): t = time.time() output = model(imgs.cuda()) output = non_max_suppression(output, conf_thres=conf_thres, nms_thres=nms_thres) for i, o in enumerate(output): if o is not None: output[i] = o[:, :6] # Compute average precision for each sample targets = [targets[i][:int(l)] for i, l in enumerate(targets_len)] for si, (labels, detections) in enumerate(zip(targets, output)): seen += 1 if detections is None: # If there are labels but no detections mark as zero AP if labels.size(0) != 0: mAPs.append(0), mR.append(0), mP.append(0) continue # Get detections sorted by decreasing confidence scores detections = detections.cpu().numpy() detections = detections[np.argsort(-detections[:, 4])] # If no labels add number of detections as incorrect correct = [] if labels.size(0) == 0: # correct.extend([0 for _ in range(len(detections))]) mAPs.append(0), mR.append(0), mP.append(0) continue else: target_cls = labels[:, 0] # Extract target boxes as (x1, y1, x2, y2) target_boxes = xywh2xyxy(labels[:, 2:6]) target_boxes[:, 0] *= img_size[0] target_boxes[:, 2] *= img_size[0] target_boxes[:, 1] *= img_size[1] target_boxes[:, 3] *= img_size[1] detected = [] for *pred_bbox, conf, obj_conf in detections: obj_pred = 0 pred_bbox = torch.FloatTensor(pred_bbox).view(1, -1) # Compute iou with target boxes iou = bbox_iou(pred_bbox, target_boxes, x1y1x2y2=True)[0] # Extract index of largest overlap best_i = np.argmax(iou) # If overlap exceeds threshold and classification is correct mark as correct if iou[best_i] > iou_thres and obj_pred == labels[ best_i, 0] and best_i not in detected: correct.append(1) detected.append(best_i) else: correct.append(0) # Compute Average Precision (AP) per class AP, AP_class, R, P = ap_per_class( tp=correct, conf=detections[:, 4], pred_cls=np.zeros_like(detections[:, 5]), # detections[:, 6] target_cls=target_cls) # Accumulate AP per class AP_accum_count += np.bincount(AP_class, minlength=nC) AP_accum += np.bincount(AP_class, minlength=nC, weights=AP) # Compute mean AP across all classes in this image, and append to image list mAPs.append(AP.mean()) mR.append(R.mean()) mP.append(P.mean()) # Means of all images mean_mAP = np.sum(mAPs) / (AP_accum_count + 1E-16) mean_R = np.sum(mR) / (AP_accum_count + 1E-16) mean_P = np.sum(mP) / (AP_accum_count + 1E-16) if batch_i % print_interval == 0: # Print image mAP and running mean mAP print(('%11s%11s' + '%11.3g' * 4 + 's') % (seen, dataloader.dataset.nF, mean_P, mean_R, mean_mAP, time.time() - t)) # Print mAP per class print('%11s' * 5 % ('Image', 'Total', 'P', 'R', 'mAP')) print('AP: %-.4f\n\n' % (AP_accum[0] / (AP_accum_count[0] + 1E-16))) # Return mAP return mean_mAP, mean_R, mean_P
def train( cfg, data_cfg, resume=False, epochs=100, batch_size=16, accumulated_batches=1, freeze_backbone=False, opt=None, ): weights = '../weights' # 改到上一层, 这样方便文件夹复制 mkdir_if_missing(weights) latest = osp.join(weights, 'latest.pt') # 这个是为了resume上次存好的checkpoint,注意不要覆盖! torch.backends.cudnn.benchmark = True # unsuitable for multiscale # Configure run print("loading data") sys.stdout.flush() f = open(data_cfg) data_config = json.load(f) trainset_paths = data_config['train'] dataset_root = data_config['root'] f.close() cfg_dict = parse_model_cfg(cfg) img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])] # Get dataloader transforms = T.Compose([T.ToTensor()]) dataset = JointDataset(dataset_root, trainset_paths, img_size, augment=True, transforms=transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=False, drop_last=True, collate_fn=collate_fn) # Initialize model print("building model") sys.stdout.flush() model = Darknet(cfg_dict, dataset.nID) cutoff = -1 # backbone reaches to cutoff layer start_epoch = 0 if resume: if opt.latest: latest_resume = "/home/master/kuanzi/weights/66_epoch_diou.pt" print("Loading the latest weight...", latest_resume) checkpoint = torch.load(latest_resume, map_location='cpu') # Load weights to resume from model.load_state_dict(checkpoint['model']) model.cuda().train() # Set optimizer classifer_param_value = list(map(id, model.classifier.parameters())) classifer_param = model.classifier.parameters() base_params = filter(lambda p: id(p) not in classifer_param_value, model.parameters()) print("classifer_param\n", classifer_param) # [2218660649072] print("classifer_param_value\n", classifer_param_value) # [2218660649072] print("base_params\n", base_params) # <filter object at 0x0000020493D95048> sys.stdout.flush() # optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr * 0.1, momentum=.9) optimizer = torch.optim.SGD( [{ 'params': filter(lambda x: x.requires_grad, base_params), 'lr': opt.lr * 0.01 }, { 'params': classifer_param, 'lr': opt.lr }], momentum=.9) # optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9) start_epoch = checkpoint['epoch'] + 1 if checkpoint['optimizer'] is not None: # Anyway, if you’re “freezing” any part of your network, and your optimizer is only passed “unfrozen” model parameters # (i.e. your optimizer filters out model parameters whose requires_grad is False), # then when resuming, you’ll need to unfreeze the network again and re-instantiate the optimizer afterwards. optimizer.load_state_dict(checkpoint['optimizer']) del checkpoint # current, saved else: # pretrain = "/home/master/kuanzi/weights/jde_1088x608_uncertainty.pt" pretrain = "/home/master/kuanzi/weights/66_epoch_diou_arcface.pt" #576x320 print("Loading jde finetune weight...", pretrain) sys.stdout.flush() checkpoint = torch.load(pretrain, map_location='cpu') model_dict = model.state_dict() pretrained_dict = { k: v for k, v in checkpoint['model'].items() if not k.startswith("classifier") } # 去掉全连接层 model_dict.update(pretrained_dict) model.load_state_dict(model_dict) model.cuda().train() print("model weight loaded") sys.stdout.flush() classifer_param_value = list(map(id, model.classifier.parameters())) classifer_param = model.classifier.parameters() base_params = filter(lambda p: id(p) not in classifer_param_value, model.parameters()) print("classifer_param\n", classifer_param) # [2218660649072] print("classifer_param_value\n", classifer_param_value) # [2218660649072] print("base_params\n", base_params) # <filter object at 0x0000020493D95048> sys.stdout.flush() # optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr * 0.1, momentum=.9) optimizer = torch.optim.SGD( [{ 'params': filter(lambda x: x.requires_grad, base_params), 'lr': opt.lr * 0.01 }, { 'params': classifer_param, 'lr': opt.lr }], momentum=.9) print("chk epoch:\n", checkpoint['epoch']) sys.stdout.flush() start_epoch = checkpoint['epoch'] + 1 else: # Initialize model with backbone (optional) print("Loading backbone...") sys.stdout.flush() if cfg.endswith('yolov3.cfg'): load_darknet_weights(model, osp.join(weights, 'darknet53.conv.74')) cutoff = 75 elif cfg.endswith('yolov3-tiny.cfg'): load_darknet_weights(model, osp.join(weights, 'yolov3-tiny.conv.15')) cutoff = 15 model.cuda().train() # Set optimizer optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9, weight_decay=1e-4) model = torch.nn.DataParallel(model) # Set scheduler scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[int(0.5 * opt.epochs), int(0.75 * opt.epochs)], gamma=0.1) # An important trick for detection: freeze bn during fine-tuning if not opt.unfreeze_bn: for i, (name, p) in enumerate(model.named_parameters()): p.requires_grad = False if 'batch_norm' in name else True model_info(model) t0 = time.time() print("begin training...") sys.stdout.flush() for epoch in range(epochs): epoch += start_epoch logger.info( ('%8s%12s' + '%10s' * 6) % ('Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time')) # Freeze darknet53.conv.74 for first epoch if freeze_backbone and (epoch < 2): for i, (name, p) in enumerate(model.named_parameters()): if int(name.split('.')[2]) < cutoff: # if layer < 75 p.requires_grad = False if (epoch == 0) else True ui = -1 rloss = defaultdict(float) # running loss optimizer.zero_grad() for i, (imgs, targets, _, _, targets_len) in enumerate(dataloader): if sum([len(x) for x in targets]) < 1: # if no targets continue continue # SGD burn-in burnin = min(1000, len(dataloader)) if (epoch == 0) & (i <= burnin): lr = opt.lr * (i / burnin)**4 for g in optimizer.param_groups: g['lr'] = lr # Compute loss, compute gradient, update parameters loss, components = model(imgs.cuda(), targets.cuda(), targets_len.cuda()) components = torch.mean(components.view(-1, 5), dim=0) loss = torch.mean(loss) loss.backward() # accumulate gradient for x batches before optimizing if ((i + 1) % accumulated_batches == 0) or (i == len(dataloader) - 1): optimizer.step() optimizer.zero_grad() # Running epoch-means of tracked metrics ui += 1 for ii, key in enumerate(model.module.loss_names): rloss[key] = (rloss[key] * ui + components[ii]) / (ui + 1) s = ('%8s%12s' + '%10.3g' * 6) % ( '%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, len(dataloader) - 1), rloss['box'], rloss['conf'], rloss['id'], rloss['loss'], rloss['nT'], time.time() - t0) t0 = time.time() if i % opt.print_interval == 0: logger.info(s) # # Save latest checkpoint # checkpoint = {'epoch': epoch, # 'model': model.module.state_dict(), # 'optimizer': optimizer.state_dict()} # torch.save(checkpoint, latest) # Calculate mAP if epoch % opt.test_interval == 0 and epoch != 0: epoch_chk = osp.join(weights, str(epoch) + '_epoch_diou_arcface_resume.pt') checkpoint = { 'epoch': epoch, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict() } torch.save(checkpoint, epoch_chk) # """ 训练与测试解耦,以下工作单独进行 """ # with torch.no_grad(): # # mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40) # # print ("test.test:\t", mAP, "\t", R, "\t", P) # test_mapgiou.test_giou(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40) # test_mapgiou.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40) # Call scheduler.step() after opimizer.step() with pytorch > 1.1.0 scheduler.step()
def train( cfg, data_cfg, resume=False, epochs=100, batch_size=16, accumulated_batches=1, freeze_backbone=False, opt=None, ): weights = 'weights' mkdir_if_missing(weights) latest = osp.join(weights, 'latest.pt') torch.backends.cudnn.benchmark = True # unsuitable for multiscale # Configure run f = open(data_cfg) data_config = json.load(f) trainset_paths = data_config['train'] dataset_root = data_config['root'] f.close() cfg_dict = parse_model_cfg(cfg) img_size = [int(cfg_dict[0]['width']), int(cfg_dict[0]['height'])] # Get dataloader transforms = T.Compose([T.ToTensor()]) dataset = JointDataset(dataset_root, trainset_paths, img_size, augment=True, transforms=transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=False, drop_last=True, collate_fn=collate_fn) # Initialize model model = Darknet(cfg_dict, dataset.nID) cutoff = -1 # backbone reaches to cutoff layer start_epoch = 0 if resume: print("Loading finetune weight...") checkpoint = torch.load(latest, map_location='cpu') # Load weights to resume from model.load_state_dict(checkpoint['model']) model.cuda().train() # Set optimizer optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9) start_epoch = checkpoint['epoch'] + 1 if checkpoint['optimizer'] is not None: optimizer.load_state_dict(checkpoint['optimizer']) del checkpoint # current, saved else: # Initialize model with backbone (optional) print("Loading backbone...") if cfg.endswith('yolov3.cfg'): load_darknet_weights(model, osp.join(weights ,'darknet53.conv.74')) cutoff = 75 elif cfg.endswith('yolov3-tiny.cfg'): load_darknet_weights(model, osp.join(weights , 'yolov3-tiny.conv.15')) cutoff = 15 model.cuda().train() # Set optimizer optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9, weight_decay=1e-4) model = torch.nn.DataParallel(model) # Set scheduler scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(0.5*opt.epochs), int(0.75*opt.epochs)], gamma=0.1) # An important trick for detection: freeze bn during fine-tuning if not opt.unfreeze_bn: for i, (name, p) in enumerate(model.named_parameters()): p.requires_grad = False if 'batch_norm' in name else True model_info(model) t0 = time.time() for epoch in range(epochs): epoch += start_epoch logger.info(('%8s%12s' + '%10s' * 6) % ( 'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time')) # Freeze darknet53.conv.74 for first epoch if freeze_backbone and (epoch < 2): for i, (name, p) in enumerate(model.named_parameters()): if int(name.split('.')[2]) < cutoff: # if layer < 75 p.requires_grad = False if (epoch == 0) else True ui = -1 rloss = defaultdict(float) # running loss optimizer.zero_grad() for i, (imgs, targets, _, _, targets_len) in enumerate(dataloader): if sum([len(x) for x in targets]) < 1: # if no targets continue continue # SGD burn-in burnin = min(1000, len(dataloader)) if (epoch == 0) & (i <= burnin): lr = opt.lr * (i / burnin) **4 for g in optimizer.param_groups: g['lr'] = lr # Compute loss, compute gradient, update parameters loss, components = model(imgs.cuda(), targets.cuda(), targets_len.cuda()) components = torch.mean(components.view(-1, 5),dim=0) loss = torch.mean(loss) loss.backward() # accumulate gradient for x batches before optimizing if ((i + 1) % accumulated_batches == 0) or (i == len(dataloader) - 1): optimizer.step() optimizer.zero_grad() # Running epoch-means of tracked metrics ui += 1 for ii, key in enumerate(model.module.loss_names): rloss[key] = (rloss[key] * ui + components[ii]) / (ui + 1) s = ('%8s%12s' + '%10.3g' * 6) % ( '%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, len(dataloader) - 1), rloss['box'], rloss['conf'], rloss['id'],rloss['loss'], rloss['nT'], time.time() - t0) t0 = time.time() if i % opt.print_interval == 0: logger.info(s) # Save latest checkpoint checkpoint = {'epoch': epoch, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict()} torch.save(checkpoint, latest) # Calculate mAP if epoch % opt.test_interval ==0: with torch.no_grad(): mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40) mAP, R, P = test_metrics.test_AP_iou(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40) mAP, R, P = test_metrics.test_AP_giou(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40) test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40) # Call scheduler.step() after opimizer.step() with pytorch > 1.1.0 scheduler.step()
def train( save_path, save_every, img_size, resume, epochs, batch_size, accumulated_batches, opt=None ): os.environ['CUDA_VISIBLE_DEVICES']=opt.gpu model_name = opt.backbone_name + '_img_size' + str(img_size[0]) + '_' + str(img_size[1]) weights_path = osp.join(save_path, model_name) loss_log_path = osp.join(weights_path, 'loss.json') mkdir_if_missing(weights_path) cfg = {} cfg['width'] = img_size[0] cfg['height'] = img_size[1] cfg['backbone_name'] = opt.backbone_name cfg['lr'] = opt.lr if resume: latest_resume = osp.join(weights_path, 'latest.pt') torch.backends.cudnn.benchmark = True # root = '/home/hunter/Document/torch' root = '/data/dgw' if opt.all_datasets: paths_trainset = {'02':'./data/track/train/MOT16-02.txt', '04':'./data/track/train/MOT16-04.txt', '05':'./data/track/train/MOT16-05.txt', '09':'./data/track/train/MOT16-09.txt', '10':'./data/track/train/MOT16-10.txt', '11':'./data/track/train/MOT16-11.txt', '13':'./data/track/train/MOT16-13.txt', 'CT':'./data/detect/CT_train.txt', 'ETH':'./data/detect/ETH.txt', 'PRW':'./data/detect/PRW_train.txt', 'CP':'./data/detect/cp_train.txt', 'CS':'./data/detect/CUHK_train.txt'} paths_valset = {'02':'./data/track/val/MOT16-02.txt', '04':'./data/track/val/MOT16-04.txt', '05':'./data/track/val/MOT16-05.txt', '09':'./data/track/val/MOT16-09.txt', '10':'./data/track/val/MOT16-10.txt', '11':'./data/track/val/MOT16-11.txt', '13':'./data/track/val/MOT16-13.txt', 'CP':'./data/detect/cp_val.txt', 'PRW':'./data/detect/PRW_val.txt', 'CT':'./data/detect/CT_val.txt', 'CS':'./data/detect/CUHK_val.txt'} else: paths_trainset = {'02':'./data/track/train/MOT16-02.txt', '04':'./data/track/train/MOT16-04.txt', '05':'./data/track/train/MOT16-05.txt', '09':'./data/track/train/MOT16-09.txt', '10':'./data/track/train/MOT16-10.txt', '11':'./data/track/train/MOT16-11.txt', '13':'./data/track/train/MOT16-13.txt'} paths_valset = {'02':'./data/track/val/MOT16-02.txt', '04':'./data/track/val/MOT16-04.txt', '05':'./data/track/val/MOT16-05.txt', '09':'./data/track/val/MOT16-09.txt', '10':'./data/track/val/MOT16-10.txt', '11':'./data/track/val/MOT16-11.txt', '13':'./data/track/val/MOT16-13.txt'} transforms = T.Compose([T.ToTensor()]) trainset = JointDataset(root=root, paths=paths_trainset, img_size=img_size, augment=True, transforms=transforms) valset = JointDataset(root=root, paths=paths_valset, img_size=img_size, augment=False, transforms=transforms) dataloader_trainset = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True, drop_last=True, collate_fn=collate_fn) dataloader_valset = torch.utils.data.DataLoader(valset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True, drop_last=True, collate_fn=collate_fn) cfg['num_ID'] = trainset.nID backbone = resnet_fpn_backbone(opt.backbone_name, True) backbone.out_channels = 256 model = Jde_RCNN(backbone, num_ID=trainset.nID, min_size=img_size[1], max_size=img_size[0], version=opt.model_version, len_embeddings=opt.len_embed) model.cuda().train() # model = torch.nn.DataParallel(model) start_epoch = 0 optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9, weight_decay=5e-4) after_scheduler = StepLR(optimizer, 10, 0.1) scheduler = GradualWarmupScheduler(optimizer, multiplier=10, total_epoch=10, after_scheduler=after_scheduler) if resume: checkpoint = torch.load(latest_resume, map_location='cpu') # Load weights to resume from print(model.load_state_dict(checkpoint['model'],strict=False)) start_epoch = checkpoint['epoch_det'] del checkpoint # current, saved else: with open(osp.join(weights_path,'model.yaml'), 'w+') as f: yaml.dump(cfg, f) for epoch in range(epochs): model.cuda().eval() with torch.no_grad(): if epoch%3==0: test_emb(model, dataloader_valset, print_interval=50)[-1] test(model, dataloader_valset, conf_thres=0.5, iou_thres=0.2, print_interval=50) scheduler.step(epoch+start_epoch) model.cuda().train() print('lr: ', optimizer.param_groups[0]['lr']) loss_epoch_log = dict(loss_total=0, loss_classifier=0, loss_box_reg=0, loss_reid=0, loss_objectness=0, loss_rpn_box_reg=0) for i, (imgs, labels, _, _, targets_len) in enumerate(tqdm(dataloader_trainset)): targets = [] imgs = imgs.cuda() labels = labels.cuda() flag = False for target_len, label in zip(targets_len.view(-1,), labels): ## convert the input to demanded format target = {} if target_len==0: flag = True if torch.all(label[0:int(target_len), 1]==-1): flag = True target['boxes'] = label[0:int(target_len), 2:6] target['ids'] = (label[0:int(target_len), 1]).long() target['labels'] = torch.ones_like(target['ids']) targets.append(target) if flag: continue losses = model(imgs, targets) loss = losses['loss_classifier'] + losses['loss_box_reg'] + losses['loss_objectness'] + losses['loss_rpn_box_reg'] + 0.4*losses['loss_reid'] loss.backward() if ((i + 1) % accumulated_batches == 0) or (i == len(dataloader_trainset) - 1): optimizer.step() optimizer.zero_grad() ## print and log the loss for key, val in losses.items(): loss_epoch_log[key] = float(val) + loss_epoch_log[key] for key, val in loss_epoch_log.items(): loss_epoch_log[key] =loss_epoch_log[key]/i print("loss in epoch %d: "%(epoch)) print(loss_epoch_log) epoch_det = epoch + start_epoch epoch_reid = epoch + start_epoch checkpoint = {'epoch_det': epoch_det, 'epoch_reid': epoch_reid, 'model': model.state_dict() } latest = osp.join(weights_path, 'latest.pt') torch.save(checkpoint, latest) if epoch % save_every == 0 and epoch != 0: torch.save(checkpoint, osp.join(weights_path, "weights_epoch_" + str(epoch_det) + '_' + str(epoch_reid) + ".pt")) with open(loss_log_path, 'a+') as f: f.write('epoch_det:'+str(epoch_det)+',epoch_reid:'+str(epoch_reid)+'\n') json.dump(loss_epoch_log, f) f.write('\n')
def train( cfg, data_cfg, weights_from="", weights_to="", save_every=10, img_size=(1088, 608), resume=False, epochs=100, batch_size=16, accumulated_batches=1, freeze_backbone=False, opt=None, ): timme = strftime("%Y-%d-%m %H:%M:%S", gmtime()) timme = timme[5:-3].replace('-', '_') timme = timme.replace(' ', '_') timme = timme.replace(':', '_') weights_to = osp.join(weights_to, 'run' + timme) # osp is os.path module mkdir_if_missing(weights_to) # make directory only if the directory does not exist if resume: latest_resume = osp.join(weights_from, 'latest.pt') torch.backends.cudnn.benchmark = True # unsuitable for multiscale """ NOTE on benchmark mode: In case the input size does not change across iterations, we can use benchmark mode. This typically leads to faster runtime. However, in case input size varies across iterations, performance may get worse. """ # Configure run f = open(data_cfg) # 'f' is the ccmcpe.json file present in cfg/ directory data_config = json.load(f) trainset_paths = data_config['train'] dataset_root = data_config['root'] f.close() transforms = T.Compose([T.ToTensor()]) # this will convert the numpy images to torch images # Get dataloader dataset = JointDataset(dataset_root, trainset_paths, img_size, augment=True, transforms=transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True, drop_last=True, collate_fn=collate_fn) """ *************************Arguments of DataLoader: (As per the documentation)****************************** dataset: Dataset from which to load the data. batch_size: Specifies the number of samples per batch to load. shuffle: Whether we want the data to be reshuffled at every epoch (True) or not (False). num_workers: Number of subprocesses to use for data loading. pin_memory: If 'True', the data loader will copy tensors into CUDA pinned memory (taking help of DMA) before returning them. drop_last: If set to 'True', the last incomplete batch is dropped, where incompleteness implies that the dataset size is not divisible by the batch size. collate_fn: It merges a list of samples to form a mini-batch. """ # Initialize model model = Darknet(cfg, dataset.nID) cutoff = -1 # backbone reaches to cutoff layer start_epoch = 0 if resume: """ Say, we have saved a 'state' of the system, and we want to continue from that state """ checkpoint = torch.load(latest_resume, map_location='cpu') # Load weights to resume from model.load_state_dict(checkpoint['model']) model.cuda().train() # Set optimizer - Here, Stochastic Gradient Descent (SGD) optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9) # lr: learning rate start_epoch = checkpoint['epoch'] + 1 if checkpoint['optimizer'] is not None: optimizer.load_state_dict(checkpoint['optimizer']) del checkpoint # current, saved
def train( cfg, data_cfg, weights_from="", weights_to="", save_every=10, img_size=(1088, 608), resume=False, epochs=100, batch_size=16, accumulated_batches=1, freeze_backbone=False, opt=None, ): # The function starts timme = strftime("%Y-%d-%m %H:%M:%S", gmtime()) timme = timme[5:-3].replace('-', '_') timme = timme.replace(' ', '_') timme = timme.replace(':', '_') weights_to = osp.join(weights_to, 'run' + timme) mkdir_if_missing(weights_to) if resume: latest_resume = osp.join(weights_from, 'latest.pt') torch.backends.cudnn.benchmark = True # unsuitable for multiscale # Configure run f = open(data_cfg) data_config = json.load(f) trainset_paths = data_config['train'] dataset_root = data_config['root'] f.close() transforms = T.Compose([T.ToTensor()]) # Get dataloader dataset = JointDataset(dataset_root, trainset_paths, img_size, augment=True, transforms=transforms) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True, drop_last=True, collate_fn=collate_fn) print("batch size","======",batch_size) # Initialize model model = Darknet(cfg, dataset.nID) cutoff = -1 # backbone reaches to cutoff layer start_epoch = 0 if resume: checkpoint = torch.load(latest_resume, map_location='cpu') # Load weights to resume from model.load_state_dict(checkpoint['model']) model.cuda().train() # Set optimizer optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9) start_epoch = checkpoint['epoch'] + 1 if checkpoint['optimizer'] is not None: optimizer.load_state_dict(checkpoint['optimizer']) del checkpoint # current, saved else: # Initialize model with backbone (optional) if cfg.endswith('yolov3.cfg'): load_darknet_weights(model, osp.join(weights_from, 'darknet53.conv.74')) cutoff = 75 elif cfg.endswith('yolov3-tiny.cfg'): load_darknet_weights(model, osp.join(weights_from, 'yolov3-tiny.conv.15')) cutoff = 15 model.cuda().train() # Set optimizer optimizer = torch.optim.SGD(filter(lambda x: x.requires_grad, model.parameters()), lr=opt.lr, momentum=.9, weight_decay=1e-4) #model = torch.nn.DataParallel(model) torch.distributed.init_process_group(backend="nccl") model = torch.nn.parallel.DistributedDataParallel(model, find_unused_parameters=True) # Set scheduler scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[int(0.5 * opt.epochs), int(0.75 * opt.epochs)], gamma=0.1) # An important trick for detection: freeze bn during fine-tuning if not opt.unfreeze_bn: for i, (name, p) in enumerate(model.named_parameters()): p.requires_grad = False if 'batch_norm' in name else True # model_info(model) t0 = time.time() for epoch in range(epochs): epoch += start_epoch logger.info(('%8s%12s' + '%10s' * 6) % ( 'Epoch', 'Batch', 'box', 'conf', 'id', 'total', 'nTargets', 'time')) # Freeze darknet53.conv.74 for first epoch if freeze_backbone and (epoch < 2): for i, (name, p) in enumerate(model.named_parameters()): if int(name.split('.')[2]) < cutoff: # if layer < 75 p.requires_grad = False if (epoch == 0) else True ui = -1 rloss = defaultdict(float) # running loss optimizer.zero_grad() for i, (imgs, targets, _, _, targets_len) in enumerate(dataloader): if sum([len(x) for x in targets]) < 1: # if no targets continue continue # SGD burn-in burnin = min(1000, len(dataloader)) if (epoch == 0) & (i <= burnin): lr = opt.lr * (i / burnin) ** 4 for g in optimizer.param_groups: g['lr'] = lr # Compute loss, compute gradient, update parameters loss, components = model(imgs.cuda(), targets.cuda(), targets_len.cuda()) components = torch.mean(components.view(-1, 5), dim=0) loss = torch.mean(loss) loss.backward() # accumulate gradient for x batches before optimizing if ((i + 1) % accumulated_batches == 0) or (i == len(dataloader) - 1): optimizer.step() optimizer.zero_grad() # Running epoch-means of tracked metrics ui += 1 for ii, key in enumerate(model.module.loss_names): rloss[key] = (rloss[key] * ui + components[ii]) / (ui + 1) # rloss indicates running loss values with mean updated at every epoch s = ('%8s%12s' + '%10.3g' * 6) % ( '%g/%g' % (epoch, epochs - 1), '%g/%g' % (i, len(dataloader) - 1), rloss['box'], rloss['conf'], rloss['id'], rloss['loss'], rloss['nT'], time.time() - t0) t0 = time.time() if i % opt.print_interval == 0: logger.info(s) # Save latest checkpoint checkpoint = {'epoch': epoch, 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict()} if not os.path.exists(weights_to + '/cfg/'): os.mkdir(weights_to+'/cfg/') copyfile(cfg, weights_to + '/cfg/yolov3.cfg') copyfile(data_cfg, weights_to + '/cfg/ccmcpe.json') latest = osp.join(weights_to, 'latest.pt') torch.save(checkpoint, latest) if epoch % save_every == 0 and epoch != 0: # making the checkpoint lite checkpoint["optimizer"] = [] torch.save(checkpoint, osp.join(weights_to, "weights_epoch_" + str(epoch) + ".pt")) # Calculate mAP if epoch % opt.test_interval == 0: with torch.no_grad(): mAP, R, P = test.test(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40) test.test_emb(cfg, data_cfg, weights=latest, batch_size=batch_size, print_interval=40) # Call scheduler.step() after opimizer.step() with pytorch > 1.1.0 scheduler.step()