def forward(self, predictions, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size(batch_size,num_priors,num_classes) loc shape: torch.size(batch_size,num_priors,4) priors shape: torch.size(num_priors,4) targets (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, priors = predictions num = loc_data.size(0) priors = priors[:loc_data.size(1), :] num_priors = (priors.size(0)) num_classes = self.num_classes # match priors (default boxes) and ground truth boxes loc_t = torch.Tensor(num, num_priors, 4) conf_t = torch.LongTensor(num, num_priors) # conf_t = torch.zeros(num,num_priors).long() for idx in range(num): target = targets[idx] truths = target[:, :-1].data labels = target[:, -1].data defaults = priors.data match(self.threshold, truths, defaults, self.variance, labels, loc_t, conf_t, idx) if self.use_gpu: loc_t = loc_t.cuda() conf_t = conf_t.cuda() # wrap targets loc_t = Variable(loc_t, requires_grad=False) conf_t = Variable(conf_t, requires_grad=False) pos = conf_t > 0 num_pos = pos.sum(dim=1, keepdim=True) # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) loc_p = loc_data[pos_idx].view(-1, 4) loc_t = loc_t[pos_idx].view(-1, 4) loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum') # Compute max conf across batch for hard negative mining batch_conf = conf_data.view(-1, self.num_classes) loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # Hard Negative Mining # loss_c[pos] = 0 # filter out pos boxes for now # loss_c = loss_c.view(num, -1) # Hard Negative Mining loss_c = loss_c.view(num, -1) loss_c[pos] = 0 _, loss_idx = loss_c.sort(1, descending=True) _, idx_rank = loss_idx.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) neg = idx_rank < num_neg.expand_as(idx_rank) # Confidence Loss Including Positive and Negative Examples pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes) targets_weighted = conf_t[(pos+neg).gt(0)] if USE_FL: alpha = np.array([[0.25], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75], [0.75]]) alpha = torch.Tensor(alpha) compute_c_loss = focal_loss.FocalLoss(alpha=alpha, gamma=2, class_num=num_classes, size_average=False) loss_c = compute_c_loss(conf_p, targets_weighted) else: loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum') # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N N = num_pos.data.sum() loss_l /= N loss_c /= N # print("N",N,"\t","loss_l",loss_l,"\t","loss_c",loss_c) return loss_l, loss_c
def forward(self, predictions, targets): """Multibox Loss Args: predictions (tuple): A tuple containing loc preds, conf preds, and prior boxes from SSD net. conf shape: torch.size【batch_size, num_priors, num_classes】 3维度 loc shape: torch.size【batch_size, num_priors, 4】 3维度 priors shape: torch.size【num_priors,4】 targets (tensor): Ground truth boxes and labels for a batch, shape: [batch_size,num_objs,5] (last idx is the label). """ loc_data, conf_data, priors = predictions # 【prediction包括net预测的位置信息,预测的类别,所有的先验框】 num = loc_data.size(0) # batch_size每次输入的图片数 priors = priors[:loc_data.size(1), :] # priors里面包括所有的先验prior框[8732,4] # feel no use num_priors = (priors.size(0)) # 8732 anchors的数量 num_classes = self.num_classes # 类别数 # match priors (default boxes) and ground truth boxes # ##下面的loc_t和conf_t是生成的随机的 loc_t = torch.Tensor(num, num_priors, 4) # [batch_size,8732,4] 每张图片有8732个先验框,每个先验框有四个数值[中心点xy,高,宽] # 用来记录每一个default box的类别,0类就是负样本 conf_t = torch.LongTensor(num, num_priors) # [batch_size,8732] 每张图片生成8732个先验框 每个先验框有一个置信度的的值 for idx in range(num): # 对每个batch_size里每一张图进行遍历 # target里面是五维度tensor,最后个维度是label truths = targets[idx][:, :-1].data # position 真实的ground_truth方框信息 targets是5维数据【前4维表示位置信息,最后1维表示类别】 labels = targets[idx][:, -1].data # labels 真实的回归框标签信息 defaults = priors.data # [8732,4] default box在同一尺度下的坐标是不变的,与batch无关 # 【MATCH函数】参数输入【阈值,ground_truth,设置的先验框prior,variance方差?,真实标签,位置预测,类别预测,遍历每个batch中的图片顺序】 match(self.threshold, truths, defaults, self.variance, labels,loc_t, conf_t, idx) # match这个函数给每个ground truth匹配了最好的priors,给每个priors匹配最好的ground truth # 经过encode后的offset([g_cx cy, g_wh])->loc_t,top class label for each prior->conf_t # match函数最后更新 loc_t, conf_t 【编码之后的位置信息和类别信息】 # loc_t 【batch_size, 8732, 4】 # conf_t【batch_size, 8732】 if self.use_gpu: # 将编码后的位置信息和类别信息放在GPU上 loc_t = loc_t.cuda() # 【loc_t里面是一个batch中所有图片的位置信息,每张图片有(8732,4)】 Tensor:【batch_size,7843,4】 conf_t = conf_t.cuda() # Tensor: 【batch_size,8732】 # wrap targets loc_t = Variable(loc_t, requires_grad=False) # #Tensor:【batch_size,7843,4】 encoded offsets to learn conf_t = Variable(conf_t, requires_grad=False) # #Tensor: 【batch_size,8732】 top class label for each prior conf_t是标签值 pos = conf_t > 0 # 只有大于0的才被认为不是背景,而是存在目标 pos=bool型 pos=Tensor:【batch_size,8732】 num_pos = pos.sum(dim=1, keepdim=True) # num_pos记录的是8732个框中是存在目标的方框 选择为正样本的数量??? # Localization Loss (Smooth L1) # Shape: [batch,num_priors,4] # loc_loss是只考虑正样本的 loc_data是预测的tensor # ## pos_idx是bool型【batch_size,8732,4】,记录的是每张图片中生成的prior中是目标是True 背景是False pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) # 首先将pos的最后个维度添加个'1' 再将bool型的pos【batch_size,8732】->【batch_size,8732,4】 loc_p = loc_data[pos_idx].view(-1, 4) # ## 由net预测的存在目标的区域目标 loc_p (p代表positive) 【前景目标区域的个数,4】 loc_t = loc_t[pos_idx].view(-1, 4) # ## 由实际GT 编码出来的loc_t # 输入的loc_p是指真实编码后的ground_truth 和 网络的预测位置结果 通过L1函数计算损失 ''' 【loss_l】即为位置损失值 ''' loss_l = F.smooth_l1_loss(loc_p, loc_t, size_average=False) # ##输入参数1:网络net的位置预测 输入参数2:真实GT编码后的位置信息 # ############################################################################################################################################ # ############################################################################################################################################ '''【难例挖掘】''' # 【conf_data】: torch.size(batch_size,num_priors,num_classes) batch_conf = conf_data.view(-1, self.num_classes) # 【batch_size*8732行,num_classes列】 一个batch_size中所有prior的数量 # 【参照论文中conf计算方式】 # ## conf_t.view(-1, 1) 【batch_size*8732行, 1列】 与GT匹配之后的置信度的值 # ## batch_conf 【batch_size*8732行,num_classes列】 每个prior中N类别的置信度 loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) # ##将预测信息按照论文中的公式编码【难懂】 # 得到的loss_c torch.Size([batch_size*8732, 1]) # 【Hard Negative Mining】 # loss_c[pos.view(-1, 1)] = 0###上面两行被同时注释掉 loss_c = loss_c.view(num, -1) # ##这里和下面一行调换了 loss=【torch.Size([batch_size, 8732])】 loss_c[pos] = 0 # ##将正例样本的损失置为0,背景样本的loss不是0 pos(bool型)=Tensor:【batch_size,8732】 _, loss_idx = loss_c.sort(1, descending=True) # _ 里面存 放每行由大到小的数列, loss_idx 降序后的元素在原本每行中的index _, idx_rank = loss_idx.sort(1) # ##idx_rank [batch_size ,8732] # ## 第一次sort:得到的index是按顺序排的索引 第两次sort:得到原Tensor的损失从大到小的映射,排第几的数字变为排名【难懂但看懂了】 # ## 总结:正样本为默认框与真实框根据iou匹配得到,负样本为分类loss值排序得到。 # ## 先将 pos bool型(True,False)转化为(1,0) num_pos:【batch_size, 1】 每一行记录的是batch中 每一张图片中有目标的prior数量 num_pos = pos.long().sum(1, keepdim=True) # ## max=pos.size(1)-1 表示最多有多少个prior,每张图片中的负样本数不能超过每张图片中最大的prior数 # ## negpos_ratio*num_pos 表示负样本数是正样本数的3倍 num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1) # num_neg返回的是 torch.Size([batch_size, 1]) # ## 【num_pos,num_neg】均为【batch_size, 1】 分别记录了每张图片中正样本和负样本的数目 比例 1:3 # ## neg(bool型)【batch_size, 8732】 选取了每张图片中 排名前(对应负样本数量)的 设置为True neg = idx_rank < num_neg.expand_as(idx_rank) # 置信度的损失包括 正/负样本都包括损失 # 因为pos 和 neg 都是bool型 因此 pos_idx 和 neg_idx 也是bool型 # ## pos_idx 和 neg_idx 均为【batch_size, 8732 ,num_classes】 pos_idx = pos.unsqueeze(2).expand_as(conf_data) neg_idx = neg.unsqueeze(2).expand_as(conf_data) # ## conf_p:【batch_size*8732 , num_classes】 # ## conf_p 包括 正/负样本都要算入损失 conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1, self.num_classes) # ## Net在每个prior框中分别预测每一类别结果【batch_size*8732 , num_classes】 targets_weighted = conf_t[(pos+neg).gt(0)] # ## 含有GT信息【batch_size,8732】 ''' 【loss_c】即为类别损失值 ''' # ##参数1:conf_p 是Net在每个prior框中分别预测每一类别结果 # ##参数2:targets_weighted 是存储的标签值long形式 # ##【FocalLoss函数是针对类别损失部分 【问题1】:正样本/负样本不均衡 【问题2】:难易样本本身对损失函数的贡献不一样】 # ##------------------------------------------------------------------------------------------------- compute_c_loss = focal_loss.FocalLoss(alpha=None, gamma=2, class_num=num_classes, size_average=False) loss_c = compute_c_loss(conf_p, targets_weighted) # ##下面是原本的损失函数 若引入FocalLoss那么就注释掉这一行 # loss_c = F.cross_entropy(conf_p, targets_weighted, size_average=False) ###【难懂没懂】 ************ # ## Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N # ##------------------------------------------------------------------------------------------------- N = num_pos.data.sum() # ## N:一个batch中的所有图片的目标总数 N=N.double() loss_l = loss_l.double() # 上面加入double()下面也添加了一行 loss_c = loss_c.double() loss_l /= N loss_c /= N return loss_l, loss_c
def main(args): Batch_Size = args.batch_size train_path = args.train_path val_path = args.val_path data_dir = '' data_transforms = { 'train': transforms.Compose([ #transforms.ColorJitter(brightness=0.1), transforms.Scale(args.scale), transforms.RandomSizedCrop(int(args.scale * 0.875)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ transforms.Scale(args.scale), transforms.CenterCrop(int(args.scale * 0.875)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } train_list = [] val_list = [] train_img_list = open(train_path) val_img_list = open(train_path) for lines in train_img_list: train_list.append(lines) for lines in val_img_list: val_list.append(lines) criterion_focalloss_class = focal_loss.FocalLoss(ignore_label=-1, gamma=5, class_num=2059) criterion_focalloss_year = focal_loss.FocalLoss(ignore_label=-1, gamma=5, class_num=3031) criterion_focalloss_color = focal_loss.FocalLoss(ignore_label=-1, gamma=5, class_num=11) criterion_focalloss_type = focal_loss.FocalLoss(ignore_label=-1, gamma=5, class_num=46) criterion = nn.CrossEntropyLoss(ignore_index=-1) dets = dict() train_data_loader = caffe_dataset.ImgListLoader(data_dir, train_path, " ", data_transforms['train']) val_data_loader = caffe_dataset.ImgListLoader(data_dir, val_path, " ", data_transforms['val']) dets['train'] = train_data_loader dets['val'] = val_data_loader #dset_list = dict() #dset_loaders = {x: torch.utils.data.DataLoader(lmdb_loader[x], batch_size=Batch_Size, # shuffle=True, num_workers=8) # for x in ['train','val']} #dset_sizes = {'train': len(train_list),'val':len(val_list)} dset_loaders = {} dset_loaders['train'] = torch.utils.data.DataLoader( dets['train'], batch_size=Batch_Size, shuffle=True, num_workers=args.num_workers, pin_memory=True) dset_loaders['val'] = torch.utils.data.DataLoader( dets['val'], batch_size=8, shuffle=True, num_workers=args.num_workers, pin_memory=True) train_list = [] val_list = [] train_img_list = open(train_path) val_img_list = open(val_path) for lines in train_img_list: train_list.append(lines) for lines in val_img_list: val_list.append(lines) dset_sizes = {'train': len(train_list), 'val': len(val_list)} use_gpu = torch.cuda.is_available() model_ft = resnext_50_multi_conv_baseline_new.resnext50_fg_car( pretrained=True, cropsize=int(args.scale * 0.875), model_dir=args.model_dir, class_num=args.class_num) # optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9) optimizer_ft = optim.Adam( [{ 'params': model_ft.resnext_car_multitask.parameters() }, { 'params': model_ft.classifier.parameters(), 'lr': 1e-3 }, { 'params': model_ft.embedding.parameters(), 'lr': 1e-3 }], lr=1e-5) # optimizer_ft = optim.Adam(filter(lambda p: p.requires_grad, model_ft.parameters()), lr=0.1) if use_gpu: model_ft = model_ft.cuda() # model_ft_parallel = torch.nn.DataParallel(model_ft,device_ids=[2,3]).cuda(1) model_ft_parallel = nn.DataParallel(model_ft, device_ids=[0, 1, 2, 3]) criterion = nn.CrossEntropyLoss(ignore_index=-1) criterion2 = nn.BCEWithLogitsLoss() # Observe that all parameters are being optimized ###################################################################### # Train and evaluate # ^^^^^^^^^^^^^^^^^^ subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') args.save_dir = args.save_dir + subdir if not os.path.exists("./output/" + args.save_dir): os.makedirs("./output/" + args.save_dir) if use_gpu: model_best = train_model(model_ft_parallel, criterion, criterion2, optimizer_ft, exp_lr_scheduler, dset_loaders, args.epoch, dset_sizes, args) else: model_best = train_model(model_ft, criterion, criterion2, optimizer_ft, exp_lr_scheduler, dset_loaders, args.epoch, dset_sizes, args) torch.save( model_best.module.state_dict(), "./output/" + args.save_dir + "/best-train-model.pth".format(epoch))
def main(opts): # training script if opts.use_cuda is not None: cuda_dev = int(opts.use_cuda) else: cuda_dev = None torch.manual_seed(opts.seed) lr_decay = float(opts.lr_decay) start_epoch = None # initialize data loaders datasets = { p: amt_dataset.AMT_Dataset(os.getcwd() + "/" + opts.data_dir, "labels.csv", p) for p in ("train", "val") } dataloaders = { p: DataLoader(datasets[p], batch_size=opts.batch_size, shuffle=True, num_workers=2, collate_fn=lambda b: list(list(l) for l in zip(*b))) for p in ("train", "val") } print('\ndataset sizes:\t{}\t{}\n'.format( *[(p, len(d)) for (p, d) in dataloaders.items()])) if opts.arch == 'baseline': import baseline net = baseline.LanguageModeler(rnn_size=opts.rnn_size, rnn_layers=1) elif opts.arch == 'cnn': import amt_cnn as cnn net = cnn.AMT_CNN(use_cuda=opts.use_cuda, max_w=opts.max_w) if opts.load: saved_state = torch.load(opts.load, map_location='cpu') net.load_state_dict(saved_state) epoch_string = opts.load.split('epoch')[-1] start_epoch = extract_first_number(epoch_string) if cuda_dev is not None: net = net.cuda(cuda_dev) os.makedirs(opts.model_weights, exist_ok=True) sys.stdout.flush() optim = torch.optim.SGD(net.parameters(), float(opts.init_lr), momentum=0.9) left_pad = opts.left_pad fl_gamma = opts.focal_gamma if opts.pos_w is None: print('Focal loss, gamma={}'.format(fl_gamma), file=sys.stderr) loss_function = focal_loss.FocalLoss(gamma=fl_gamma) else: try: npr = float(opts.pos_w) except: try: with open(opts.pos_w, 'r') as fp: for line in fp.readlines(): line = line.strip().split(',') if line[0] == 'all': npr = float(line[1]) except: print('error: cannot interpret --pos_w value: {}'.format( opts.pos_w), file=sys.stderr) exit(1) if npr == 0.: pw = None else: pw = torch.ones(88) * npr print('BCE loss, positive weight:', file=sys.stderr) print(pw, file=sys.stderr) if cuda_dev is not None and pw is not None: pw = pw.cuda(cuda_dev) loss_function = nn.BCEWithLogitsLoss(pos_weight=pw) sys.stderr.flush() train(net, dataloaders, optim, loss_function, start_epoch=start_epoch, num_epochs=opts.max_epochs, model_dir=opts.model_weights, cuda_dev=cuda_dev, max_w=opts.max_w, left_pad=left_pad, lr_decay=lr_decay)
num_classes=4) modelEb3 = EfficientNet.from_pretrained('efficientnet-b3', in_channels=3, num_classes=4) modelRes18 = models.resnet18(pretrained=True) num_ftrs = modelRes18.fc.in_features modelRes18.fc = nn.Linear(num_ftrs, 4) # the last fc layer modelEb4.to(device) modelEb3.to(device) modelRes18.to(device) model = ensemble.Ensemble(modelEb4, modelEb3, modelRes18).to(device) bind_model(model, device) criterion = focal_loss.FocalLoss(device).to(device) optimizerEb4 = torch.optim.Adam(modelEb4.parameters(), lr=learning_rate) optimizerEb3 = torch.optim.Adam(modelEb3.parameters(), lr=learning_rate) optimizerRes18 = torch.optim.Adam(modelRes18.parameters(), lr=learning_rate) scheduler_cosineEb4 = torch.optim.lr_scheduler.CosineAnnealingLR( optimizerEb4, cosine_epo) scheduler_cosineEb3 = torch.optim.lr_scheduler.CosineAnnealingLR( optimizerEb3, cosine_epo) scheduler_cosineRes18 = torch.optim.lr_scheduler.CosineAnnealingLR( optimizerRes18, cosine_epo) if ifpause: ## for test mode print('Inferring Start ...') nsml.paused(scope=locals())