def train(Config, model, epoch_num, start_epoch, optimizer, exp_lr_scheduler, data_loader, save_dir, data_size=448, savepoint=500, checkpoint=1000): # savepoint: save without evalution # checkpoint: save with evaluation step = 0 eval_train_flag = False rec_loss = [] checkpoint_list = [] train_batch_size = data_loader['train'].batch_size train_epoch_step = data_loader['train'].__len__() train_loss_recorder = LossRecord(train_batch_size) if savepoint > train_epoch_step: savepoint = 1 * train_epoch_step checkpoint = savepoint date_suffix = dt() log_file = open( os.path.join( Config.log_folder, 'formal_log_r50_dcl_%s_%s.log' % (str(data_size), date_suffix)), 'a') add_loss = nn.L1Loss() get_ce_loss = nn.CrossEntropyLoss() get_focal_loss = FocalLoss() get_angle_loss = AngleLoss() for epoch in range(start_epoch, epoch_num - 1): exp_lr_scheduler.step(epoch) model.train(True) save_grad = [] for batch_cnt, data in enumerate(data_loader['train']): step += 1 loss = 0 model.train(True) if Config.use_backbone: inputs, labels, img_names = data inputs = Variable(inputs.cuda()) labels = Variable(torch.from_numpy(np.array(labels)).cuda()) if Config.use_dcl: inputs, labels, labels_swap, swap_law, img_names = data inputs = Variable(inputs.cuda()) labels = Variable(torch.from_numpy(np.array(labels)).cuda()) labels_swap = Variable( torch.from_numpy(np.array(labels_swap)).cuda()) swap_law = Variable( torch.from_numpy(np.array(swap_law)).float().cuda()) optimizer.zero_grad() if inputs.size(0) < 2 * train_batch_size: outputs = model(inputs, inputs[0:-1:2]) else: outputs = model(inputs, None) if Config.use_focal_loss: ce_loss = get_focal_loss(outputs[0], labels) else: ce_loss = get_ce_loss(outputs[0], labels) if Config.use_Asoftmax: fetch_batch = labels.size(0) if batch_cnt % (train_epoch_step // 5) == 0: angle_loss = get_angle_loss(outputs[3], labels[0:fetch_batch:2], decay=0.9) else: angle_loss = get_angle_loss(outputs[3], labels[0:fetch_batch:2]) loss += angle_loss loss += ce_loss alpha_ = 1 beta_ = 1 gamma_ = 0.01 if Config.dataset == 'STCAR' or Config.dataset == 'AIR' else 1 if Config.use_dcl: swap_loss = get_ce_loss(outputs[1], labels_swap) * beta_ loss += swap_loss law_loss = add_loss(outputs[2], swap_law) * gamma_ loss += law_loss loss.backward() torch.cuda.synchronize() optimizer.step() torch.cuda.synchronize() if Config.use_dcl: print( 'step: {:-8d} / {:d} loss=ce_loss+swap_loss+law_loss: {:6.4f} = {:6.4f} + {:6.4f} + {:6.4f} ' .format(step, train_epoch_step, loss.detach().item(), ce_loss.detach().item(), swap_loss.detach().item(), law_loss.detach().item()), flush=True) if Config.use_backbone: print( 'step: {:-8d} / {:d} loss=ce_loss+swap_loss+law_loss: {:6.4f} = {:6.4f} ' .format(step, train_epoch_step, loss.detach().item(), ce_loss.detach().item()), flush=True) rec_loss.append(loss.detach().item()) train_loss_recorder.update(loss.detach().item()) # evaluation & save if step % checkpoint == 0: rec_loss = [] print(32 * '-', flush=True) print( 'step: {:d} / {:d} global_step: {:8.2f} train_epoch: {:04d} rec_train_loss: {:6.4f}' .format(step, train_epoch_step, 1.0 * step / train_epoch_step, epoch, train_loss_recorder.get_val()), flush=True) print('current lr:%s' % exp_lr_scheduler.get_lr(), flush=True) if eval_train_flag: trainval_acc1, trainval_acc2, trainval_acc3 = eval_turn( model, data_loader['trainval'], 'trainval', epoch, log_file) if abs(trainval_acc1 - trainval_acc3) < 0.01: eval_train_flag = False val_acc1, val_acc2, val_acc3 = eval_turn( model, data_loader['val'], 'val', epoch, log_file) save_path = os.path.join( save_dir, 'weights_%d_%d_%.4f_%.4f.pth' % (epoch, batch_cnt, val_acc1, val_acc3)) torch.cuda.synchronize() torch.save(model.state_dict(), save_path) print('saved model to %s' % (save_path), flush=True) torch.cuda.empty_cache() # save only elif step % savepoint == 0: train_loss_recorder.update(rec_loss) rec_loss = [] save_path = os.path.join( save_dir, 'savepoint_weights-%d-%s.pth' % (step, dt())) checkpoint_list.append(save_path) if len(checkpoint_list) == 6: os.remove(checkpoint_list[0]) del checkpoint_list[0] torch.save(model.state_dict(), save_path) torch.cuda.empty_cache() log_file.close()
def train(Config, model, epoch_num, start_epoch, optimizer, exp_lr_scheduler, data_loader, save_dir, data_size=448, savepoint=5000, checkpoint=5000): # savepoint: save without evalution # checkpoint: save with evaluation eval_train_flag = False rec_loss = [] checkpoint_list = [] train_batch_size = data_loader['train'].batch_size train_epoch_step = data_loader['train'].__len__() train_loss_recorder = LossRecord(train_batch_size) if savepoint > train_epoch_step: savepoint = 1 * train_epoch_step checkpoint = savepoint date_suffix = dt() log_file = open( os.path.join( Config.log_folder, 'formal_log_r50_dcl_%s_%s.log' % (str(data_size), date_suffix)), 'a') add_loss = nn.L1Loss() get_ce_loss = nn.CrossEntropyLoss() get_ce_sig_loss = nn.BCELoss() get_focal_loss = FocalLoss() get_angle_loss = AngleLoss() step = 0 for epoch in range(start_epoch, epoch_num - 1): exp_lr_scheduler.step(epoch) model.train(True) save_grad = [] for batch_cnt, data in enumerate(data_loader['train']): step += 1 loss = 0 model.train(True) if Config.use_backbone: inputs, labels, img_names = data inputs = Variable(inputs.cuda()) # labels = Variable(torch.LongTensor(np.array(labels)).cuda()) labels = Variable(torch.FloatTensor(np.array(labels)).cuda()) if Config.use_dcl: inputs, labels, labels_swap, swap_law, law_index, img_names = data inputs = Variable(inputs.cuda()) # print (type(labels)) # labels = Variable(torch.LongTensor(np.array(labels)).cuda()) labels = Variable(torch.FloatTensor(np.array(labels)).cuda()) ####### dy modify # labels_numpy = np.array(labels.cpu()).astype(np.uint8) # print (labels_numpy) labels_swap = Variable( torch.LongTensor(np.array(labels_swap)).cuda()) swap_law = Variable( torch.LongTensor(np.array(swap_law)).float().cuda()) optimizer.zero_grad() if inputs.size(0) < 2 * train_batch_size: outputs = model(inputs, inputs[0:-1:2]) else: outputs = model(inputs, law_index) idx_unswap = torch.tensor([0, 2, 4, 6, 8], dtype=torch.long).cuda() unswap_label = torch.index_select(labels, dim=0, index=idx_unswap) # print (inputs.size(0)) if Config.use_focal_loss: ce_loss = get_focal_loss(outputs[0], labels) else: # ce_loss = get_ce_loss(outputs[0], labels) ### classification batach x 200 # print (outputs[0].shape) # print (unswap_label.shape) ce_loss = get_ce_sig_loss( outputs[0], unswap_label) ### classification batach x 200 if Config.use_Asoftmax: fetch_batch = labels.size(0) if batch_cnt % (train_epoch_step // 5) == 0: angle_loss = get_angle_loss(outputs[3], labels[0:fetch_batch:2], decay=0.9) else: angle_loss = get_angle_loss(outputs[3], labels[0:fetch_batch:2]) loss += angle_loss alpha_ = 1 loss += ce_loss * alpha_ beta_ = 0.1 gamma_ = 0.01 if Config.dataset == 'STCAR' or Config.dataset == 'AIR' else 1 if Config.use_dcl: swap_loss = get_ce_loss( outputs[1], labels_swap ) * beta_ ### adverisal classification batach x 2 loss += swap_loss ####### 0.692 * 0.1 = 0.0692 law_loss = add_loss( outputs[2], swap_law ) * gamma_ ### mask L1Loss batach x 49 L1 Loss 主要用来计算 input x 和 target y 的逐元素间差值的平均绝对值. loss += law_loss ####### 0.0683 * 1 = 0.0683 loss.backward() torch.cuda.synchronize() optimizer.step() torch.cuda.synchronize() if Config.use_dcl: print( 'epoch:{:d}, globalstep: {:-8d}, {:d} / {:d} \n loss=ce_l+swap_l+law_l: {:6.4f} = {:6.4f} + {:6.4f} + {:6.4f} ' .format(epoch, step, batch_cnt, train_epoch_step, loss.detach().item(), ce_loss.detach().item(), swap_loss.detach().item(), law_loss.detach().item()), flush=True) if Config.use_backbone: print( 'step: {:-8d} / {:d} loss=ce_loss+swap_loss+law_loss: {:6.4f} = {:6.4f} ' .format(step, train_epoch_step, loss.detach().item(), ce_loss.detach().item()), flush=True) rec_loss.append(loss.detach().item()) train_loss_recorder.update(loss.detach().item()) # evaluation & save if step % checkpoint == 0: rec_loss = [] print(32 * '-', flush=True) print( 'step: {:d} / {:d} global_step: {:8.2f} train_epoch: {:04d} rec_train_loss: {:6.4f}' .format(step, train_epoch_step, 1.0 * step / train_epoch_step, epoch, train_loss_recorder.get_val()), flush=True) print('current lr:%s' % exp_lr_scheduler.get_lr(), flush=True) val_acc = eval_turn(Config, model, data_loader['trainval'], 'val', epoch, log_file) # if val_acc >0.9: # checkpoint = 500 # savepoint = 500 # save_path = os.path.join(save_dir, 'weights_%d_%d_%.4f_%.4f.pth'%(epoch, batch_cnt, val_acc1, val_acc3)) save_path = os.path.join( save_dir, 'weights_%d_%d_%.4f.pth' % (epoch, batch_cnt, val_acc)) torch.cuda.synchronize() torch.save(model.state_dict(), save_path) print('saved model to %s' % (save_path), flush=True) torch.cuda.empty_cache() # save only elif step % savepoint == 0: train_loss_recorder.update(rec_loss) rec_loss = [] save_path = os.path.join( save_dir, 'savepoint_weights-%d-%s.pth' % (step, dt())) checkpoint_list.append(save_path) if len(checkpoint_list) == 6: os.remove(checkpoint_list[0]) del checkpoint_list[0] torch.save(model.state_dict(), save_path) torch.cuda.empty_cache() log_file.close()
def eval_turn(Config, model, data_loader, val_version, epoch_num, log_file): model.train(False) val_corrects1 = 0 val_corrects2 = 0 val_corrects3 = 0 val_size = data_loader.__len__() item_count = data_loader.total_item_len t0 = time.time() get_l1_loss = nn.L1Loss() get_ce_loss = nn.CrossEntropyLoss() val_batch_size = data_loader.batch_size val_epoch_step = data_loader.__len__() num_cls = data_loader.num_cls val_loss_recorder = LossRecord(val_batch_size) val_celoss_recorder = LossRecord(val_batch_size) print('evaluating %s ...' % val_version, flush=True) with torch.no_grad(): for batch_cnt_val, data_val in enumerate(data_loader): inputs = Variable(data_val[0].cuda()) labels = Variable( torch.from_numpy(np.array(data_val[1])).long().cuda()) outputs = model(inputs) loss = 0 ce_loss = get_ce_loss(outputs[0], labels).item() loss += ce_loss val_loss_recorder.update(loss) val_celoss_recorder.update(ce_loss) if Config.use_dcl and Config.cls_2xmul: outputs_pred = outputs[0] + outputs[1][:, 0:num_cls] + outputs[ 1][:, num_cls:2 * num_cls] else: outputs_pred = outputs[0] top3_val, top3_pos = torch.topk(outputs_pred, 3) # print('{:s} eval_batch: {:-6d} / {:d} loss: {:8.4f}'.format(val_version, batch_cnt_val, val_epoch_step, loss), flush=True) batch_corrects1 = torch.sum((top3_pos[:, 0] == labels)).data.item() val_corrects1 += batch_corrects1 batch_corrects2 = torch.sum((top3_pos[:, 1] == labels)).data.item() val_corrects2 += (batch_corrects2 + batch_corrects1) batch_corrects3 = torch.sum((top3_pos[:, 2] == labels)).data.item() val_corrects3 += (batch_corrects3 + batch_corrects2 + batch_corrects1) val_acc1 = val_corrects1 / item_count val_acc2 = val_corrects2 / item_count val_acc3 = val_corrects3 / item_count log_file.write(val_version + '\t' + str(val_loss_recorder.get_val()) + '\t' + str(val_celoss_recorder.get_val()) + '\t' + str(val_acc1) + '\t' + str(val_acc3) + '\n') t1 = time.time() since = t1 - t0 print('--' * 30, flush=True) print( '% 3d %s %s %s-loss: %.4f ||%s-acc@1: %.4f %s-acc@2: %.4f %s-acc@3: %.4f loss: %8.4f ||time: %d' % (epoch_num, val_version, dt(), val_version, val_loss_recorder.get_val(init=True), val_version, val_acc1, val_version, val_acc2, val_version, val_acc3, loss, since), flush=True) print('--' * 30, flush=True) return val_acc1, val_acc2, val_acc3
def eval_turn(Config, model, data_loader, val_version, epoch_num, log_file): model.train(False) val_corrects1 = 0 val_corrects2 = 0 val_corrects3 = 0 val_size = data_loader.__len__() item_count = data_loader.total_item_len t0 = time.time() get_l1_loss = nn.L1Loss() get_ce_loss = nn.CrossEntropyLoss() val_batch_size = data_loader.batch_size val_epoch_step = data_loader.__len__() num_cls = data_loader.num_cls val_loss_recorder = LossRecord(val_batch_size) val_celoss_recorder = LossRecord(val_batch_size) print('evaluating %s ...' % val_version, flush=True) scores = [] test_labels = [] with torch.no_grad(): for batch_cnt_val, data_val in enumerate(data_loader): inputs = Variable(data_val[0].cuda()) labels = Variable( torch.from_numpy(np.array(data_val[1])).long().cuda()) # outputs = model(inputs) # import ipdb; ipdb.set_trace() preds = model(inputs) preds = F.softmax(preds[0], dim=1) # import ipdb; ipdb.set_trace() preds = preds.cpu().data.numpy() preds = preds[:, 1].squeeze() labels = list(labels.squeeze()) preds = list(preds) scores = scores + preds test_labels = test_labels + labels # loss = 0 # ce_loss = get_ce_loss(preds[0], labels).item() # loss += ce_loss # val_loss_recorder.update(loss) # val_celoss_recorder.update(ce_loss) # if Config.use_dcl and Config.cls_2xmul: # outputs_pred = preds[0] + preds[1][:,0:num_cls] + preds[1][:,num_cls:2*num_cls] # else: # outputs_pred = preds[0] # # top3_val, top3_pos = torch.topk(outputs_pred, 3) # print('{:s} eval_batch: {:-6d} / {:d} loss: {:8.4f}'.format(val_version, batch_cnt_val, val_epoch_step, loss), flush=True) # batch_corrects1 = torch.sum((top3_pos[:, 0] == labels)).data.item() # val_corrects1 += batch_corrects1 # batch_corrects2 = torch.sum((top3_pos[:, 1] == labels)).data.item() # val_corrects2 += (batch_corrects2 + batch_corrects1) # batch_corrects3 = torch.sum((top3_pos[:, 2] == labels)).data.item() # val_corrects3 += (batch_corrects3 + batch_corrects2 + batch_corrects1) # val_acc1 = val_corrects1 / item_count # val_acc2 = val_corrects2 / item_count # val_acc3 = val_corrects3 / item_count # calculate tpr fpr_list = [0.01, 0.005, 0.001] threshold_list = get_thresholdtable_from_fpr(scores, test_labels, fpr_list) tpr_list = get_tpr_from_threshold(scores, test_labels, threshold_list) # show results print( '=========================================================================' ) print('TPR@FPR=10E-3: {}\n'.format(tpr_list[0])) print('TPR@FPR=5E-3: {}\n'.format(tpr_list[1])) print('TPR@FPR=10E-4: {}\n'.format(tpr_list[2])) print( '=========================================================================' ) log_file.write(val_version + '\t' + str(tpr_list[0]) + '\t' + str(tpr_list[1]) + '\t' + str(tpr_list[2]) + '\n') t1 = time.time() since = t1 - t0 print('--' * 30, flush=True) # print('% 3d %s %s %s-loss: %.4f ||%s-acc@1: %.4f %s-acc@2: %.4f %s-acc@3: %.4f ||time: %d' % (epoch_num, val_version, dt(), val_version, val_loss_recorder.get_val(init=True), val_version, val_acc1,val_version, val_acc2, val_version, val_acc3, since), flush=True) print( 'TPR@FPR=10E-3: %.4f || TPR@FPR=5E-3: %.4f || TPR@FPR=10E-4: %.4f ||time: %d' % (tpr_list[0], tpr_list[1], tpr_list[2], since), flush=True) print('--' * 30, flush=True) return tpr_list[0], tpr_list[1], tpr_list[2]
def train(Config, model, epoch_num, start_epoch, optimizer, exp_lr_scheduler, data_loader, save_dir, sw, data_size=448, savepoint=500, checkpoint=1000): # savepoint: save without evalution # checkpoint: save with evaluation best_prec1 = 0. step = 0 eval_train_flag = False rec_loss = [] checkpoint_list = [] train_batch_size = data_loader['train'].batch_size train_epoch_step = data_loader['train'].__len__() train_loss_recorder = LossRecord(train_batch_size) if savepoint > train_epoch_step: savepoint = 1 * train_epoch_step checkpoint = savepoint date_suffix = dt() # log_file = open(os.path.join(Config.log_folder, 'formal_log_r50_dcl_%s_%s.log'%(str(data_size), date_suffix)), 'a') add_loss = nn.L1Loss() get_ce_loss = nn.CrossEntropyLoss() get_loss1 = Loss_1() get_focal_loss = FocalLoss() get_angle_loss = AngleLoss() for epoch in range(start_epoch, epoch_num - 1): optimizer.step() exp_lr_scheduler.step(epoch) model.train(True) save_grad = [] for batch_cnt, data in enumerate(data_loader['train']): step += 1 loss = 0 model.train(True) if Config.use_backbone: inputs, labels, img_names = data inputs = Variable(inputs.cuda()) labels = Variable(torch.from_numpy(np.array(labels)).cuda()) if Config.use_dcl: if Config.multi: inputs, labels, labels_swap, swap_law, blabels, clabels, tlabels, img_names = data else: inputs, labels, labels_swap, swap_law, img_names = data inputs = Variable(inputs.cuda()) labels = Variable(torch.from_numpy(np.array(labels)).cuda()) labels_swap = Variable( torch.from_numpy(np.array(labels_swap)).cuda()) swap_law = Variable( torch.from_numpy(np.array(swap_law)).float().cuda()) if Config.multi: blabels = Variable( torch.from_numpy(np.array(blabels)).cuda()) clabels = Variable( torch.from_numpy(np.array(clabels)).cuda()) tlabels = Variable( torch.from_numpy(np.array(tlabels)).cuda()) optimizer.zero_grad() # 显示输入图片 # sw.add_image('attention_image', inputs[0]) if inputs.size(0) < 2 * train_batch_size: outputs = model(inputs, inputs[0:-1:2]) else: outputs = model(inputs, None) if Config.multi: if Config.use_loss1: b_loss, pro_b = get_loss1(outputs[2], blabels) # 关联品牌标签和车型 t_loss, _ = get_loss1(outputs[4], tlabels, brand_prob=pro_b) s_loss, pro_s = get_loss1(outputs[0], labels, brand_prob=pro_b) c_loss, _ = get_loss1(outputs[3], clabels) ce_loss = b_loss + t_loss + s_loss + c_loss * 0.2 else: ce_loss = get_ce_loss(outputs[0], labels) + get_ce_loss( outputs[0], blabels) + get_ce_loss( outputs[0], clabels) + get_ce_loss( outputs[0], tlabels) else: if Config.use_focal_loss: ce_loss = get_focal_loss(outputs[0], labels) else: if Config.use_loss1: # 直接内部组合两个loss ce_loss_1, pro = get_loss1(outputs[0], labels) ce_loss = 0 else: ce_loss = get_ce_loss(outputs[0], labels) if Config.use_Asoftmax: fetch_batch = labels.size(0) if batch_cnt % (train_epoch_step // 5) == 0: angle_loss = get_angle_loss(outputs[3], labels[0:fetch_batch:2], decay=0.9) else: angle_loss = get_angle_loss(outputs[3], labels[0:fetch_batch:2]) loss += angle_loss loss += ce_loss alpha_ = 1 beta_ = 1 # gamma_ = 0.01 if Config.dataset == 'STCAR' or Config.dataset == 'AIR' else 1 gamma_ = 0.01 if Config.use_dcl: if Config.use_focal_loss: swap_loss = get_focal_loss(outputs[1], labels_swap) * beta_ else: if Config.use_loss1: swap_loss, _ = get_loss1(outputs[1], labels_swap, brand_prob=pro_s) else: swap_loss = get_ce_loss(outputs[1], labels_swap) * beta_ loss += swap_loss if not Config.no_loc: law_loss = add_loss(outputs[2], swap_law) * gamma_ loss += law_loss loss.backward() torch.cuda.synchronize() torch.cuda.synchronize() if Config.use_dcl: if Config.multi: print( 'step: {:-8d} / {:d} loss: {:6.4f} ce_loss: {:6.4f} swap_loss: {:6.4f} ' .format(step, train_epoch_step, loss.detach().item(), ce_loss.detach().item(), swap_loss.detach().item()), flush=True) # if Config.use_loss1: # print( # 'step: {:-8d} / {:d} loss: {:6.4f} ce_loss: {:6.4f} swap_loss: {:6.4f} '.format(step,train_epoch_step,loss.detach().item(),ce_loss.detach().item(),swap_loss.detach().item()), # flush=True) elif Config.no_loc: print( 'step: {:-8d} / {:d} loss=ce_loss+swap_loss+law_loss: {:6.4f} = {:6.4f} + {:6.4f} ' .format(step, train_epoch_step, loss.detach().item(), ce_loss.detach().item(), swap_loss.detach().item()), flush=True) else: print( 'step: {:-8d} / {:d} loss=ce_loss+swap_loss+law_loss: {:6.4f} = {:6.4f} + {:6.4f} + {:6.4f} ' .format(step, train_epoch_step, loss.detach().item(), ce_loss.detach().item(), swap_loss.detach().item(), law_loss.detach().item()), flush=True) if Config.use_backbone: print( 'step: {:-8d} / {:d} loss=ce_loss+swap_loss+law_loss: {:6.4f} = {:6.4f} ' .format(step, train_epoch_step, loss.detach().item(), ce_loss.detach().item()), flush=True) rec_loss.append(loss.detach().item()) train_loss_recorder.update(loss.detach().item()) # evaluation & save if step % checkpoint == 0: rec_loss = [] print(32 * '-', flush=True) print( 'step: {:d} / {:d} global_step: {:8.2f} train_epoch: {:04d} rec_train_loss: {:6.4f}' .format(step, train_epoch_step, 1.0 * step / train_epoch_step, epoch, train_loss_recorder.get_val()), flush=True) print('current lr:%s' % exp_lr_scheduler.get_lr(), flush=True) if Config.multi: val_acc_s, val_acc_b, val_acc_c, val_acc_t = eval_turn( Config, model, data_loader['val'], 'val', epoch) is_best = val_acc_s > best_prec1 best_prec1 = max(val_acc_s, best_prec1) filename = 'weights_%d_%d_%.4f_%.4f.pth' % ( epoch, batch_cnt, val_acc_s, val_acc_b) save_checkpoint(model.state_dict(), is_best, save_dir, filename) sw.add_scalar("Train_Loss/Total_loss", loss.detach().item(), epoch) sw.add_scalar("Train_Loss/b_loss", b_loss.detach().item(), epoch) sw.add_scalar("Train_Loss/t_loss", t_loss.detach().item(), epoch) sw.add_scalar("Train_Loss/s_loss", s_loss.detach().item(), epoch) sw.add_scalar("Train_Loss/c_loss", c_loss.detach().item(), epoch) sw.add_scalar("Accurancy/val_acc_s", val_acc_s, epoch) sw.add_scalar("Accurancy/val_acc_b", val_acc_b, epoch) sw.add_scalar("Accurancy/val_acc_c", val_acc_c, epoch) sw.add_scalar("Accurancy/val_acc_t", val_acc_t, epoch) sw.add_scalar("learning_rate", exp_lr_scheduler.get_lr()[1], epoch) else: val_acc1, val_acc2, val_acc3 = eval_turn( Config, model, data_loader['val'], 'val', epoch) is_best = val_acc1 > best_prec1 best_prec1 = max(val_acc1, best_prec1) filename = 'weights_%d_%d_%.4f_%.4f.pth' % ( epoch, batch_cnt, val_acc1, val_acc3) save_checkpoint(model.state_dict(), is_best, save_dir, filename) sw.add_scalar("Train_Loss", loss.detach().item(), epoch) sw.add_scalar("Val_Accurancy", val_acc1, epoch) sw.add_scalar("learning_rate", exp_lr_scheduler.get_lr()[1], epoch) torch.cuda.empty_cache() # save only elif step % savepoint == 0: train_loss_recorder.update(rec_loss) rec_loss = [] save_path = os.path.join( save_dir, 'savepoint_weights-%d-%s.pth' % (step, dt())) checkpoint_list.append(save_path) if len(checkpoint_list) == 6: os.remove(checkpoint_list[0]) del checkpoint_list[0] torch.save(model.state_dict(), save_path) torch.cuda.empty_cache()
def eval_turn(Config, model, data_loader, val_version, epoch_num, log_file): model.train(False) val_corrects1 = 0 val_corrects2 = 0 val_corrects3 = 0 val_size = data_loader.__len__() item_count = data_loader.total_item_len t0 = time.time() get_l1_loss = nn.L1Loss() get_ce_loss = nn.CrossEntropyLoss() get_ce_sig_loss = nn.BCELoss() val_batch_size = data_loader.batch_size val_epoch_step = data_loader.__len__() num_cls = data_loader.num_cls val_loss_recorder = LossRecord(val_batch_size) val_celoss_recorder = LossRecord(val_batch_size) print('evaluating %s ...' % val_version, flush=True) eval_t = locals() sum_fbeta = 0 y_pred, Y_test = [], [] sum_fbeta = 0 best_fbeta = 0 ave_test_accu_final = 0 test_file = open("./result_log/val.log", "a+") with torch.no_grad(): for batch_cnt_val, data_val in enumerate(data_loader): # inputs = Variable(data_val[0].cuda()) # labels = Variable(torch.LongTensor(np.array(data_val[1])).long().cuda()) inputs, labels, labels_swap, swap_law, law_index, img_names = data_val # labels_npy = np.array(labels) # labels_tensor = Variable(torch.FloatTensor(np.array(labels)).cuda()) labels_tensor_ = Variable( torch.FloatTensor(np.array(labels)).cuda()) idx_unswap = torch.tensor([0, 2, 4, 6, 8], dtype=torch.long).cuda() labels_tensor = torch.index_select(labels_tensor_, dim=0, index=idx_unswap) labels_npy = np.array(labels_tensor.cpu()) labels_ = labels_npy.astype(np.uint8) inputs = Variable(inputs.cuda()) outputs = model(inputs, law_index) # print (outputs[0].shape) loss = 0 # ce_loss = get_ce_loss(outputs[0], labels).item() ce_loss = get_ce_sig_loss(outputs[0], labels_tensor).item() loss += ce_loss val_loss_recorder.update(loss) val_celoss_recorder.update(ce_loss) if Config.use_dcl and Config.cls_2xmul: outputs_pred = outputs[0] + outputs[1][:, 0:num_cls] + outputs[ 1][:, num_cls:2 * num_cls] else: outputs_pred = outputs[0] # cal_sigmoid = nn.Sigmoid() # outputs_pred_s = cal_sigmoid(outputs_pred) ######## MAP is label ranking, do not need normilization # predict_multensor = torch.ge(outputs_pred, 0.5) ### 大于0.5的置为一,其他置为0,类似于阈值化操作 predict_mul_ = outputs_pred.cpu().numpy() temp_fbeta = label_ranking_average_precision_score( labels_, predict_mul_) predict_multensor = torch.ge(outputs_pred, 0.5) ### 大于0.5的置为一,其他置为0,类似于阈值化操作 predict_mul = predict_multensor.cpu().numpy() sum_fbeta = sum_fbeta + temp_fbeta ave_num = batch_cnt_val + 1 y_pred.extend(predict_mul[:]) Y_test.extend(labels_[:]) ave_acc = sum_fbeta / ave_num y_pred_ = np.array(y_pred) Y_test_ = np.array(Y_test) log_file.write(val_version + '\t' + str(val_loss_recorder.get_val()) + '\t' + str(val_celoss_recorder.get_val()) + '\t' + str(ave_acc) + '\n') t1 = time.time() since = t1 - t0 print('--' * 30, flush=True) print('% 3d %s %s %s-loss: %.4f ||%s-ave@acc: %.4f ||time: %d' % (epoch_num, val_version, dt(), val_version, val_loss_recorder.get_val(init=True), val_version, ave_acc, since), flush=True) print('--' * 30, flush=True) eval_t['metrics_' + str(0.5)] = evaluate_test(predictions=y_pred_, labels=Y_test_) metrics = eval_t['metrics_' + str(0.5)] output = "=> Test : Coverage = {}".format(epoch_num) output += "=> Test : Coverage = {}\n Average Precision = {}\n Micro Precision = {}\n Micro Recall = {}\n Micro F Score = {}\n".format( metrics['coverage'], ave_acc, metrics['micro_precision'], metrics['micro_recall'], metrics['micro_f1']) output += "=> Test : Macro Precision = {}\n Macro Recall = {}\n Macro F Score = {}\n ranking_loss = {}\n hamming_loss = {}\n\n".format( metrics['macro_precision'], metrics['macro_recall'], metrics['macro_f1'], metrics['ranking_loss'], metrics['hamming_loss']) # output += "\n=> Test : ma-False_positive_rate(FPR) = {}, mi-False_positive_rate(FPR) = {}\n".format(metrics['ma-FPR'],metrics['mi-FPR']) print(output) test_file.write(output) test_file.close() return ave_acc
def train(Config, model, epoch_num, start_epoch, optimizer, exp_lr_scheduler, data_loader, save_dir, data_ver='all', data_size=448, savepoint=500, checkpoint=1000): step = 0 eval_train_flag = False rec_loss = [] checkpoint_list = [] train_batch_size = data_loader['train'].batch_size train_epoch_step = data_loader['train'].__len__() train_loss_recorder = LossRecord(train_batch_size) bitempered_layer = BiTemperedLayer(t1=0.9, t2=1.05) bitempered_loss = BiTemperedLoss() add_loss = nn.L1Loss() get_focal_loss = FocalLoss() get_angle_loss = AngleLoss() get_ce_loss = nn.CrossEntropyLoss() get_l2_loss = nn.MSELoss() for epoch in range(start_epoch, epoch_num - 1): exp_lr_scheduler.step(epoch) model.train(True) save_grad = [] for batch_cnt, data in enumerate(data_loader['train']): step += 1 loss = 0 model.train(True) inputs, labels, img_names = data inputs = inputs.cuda() labels = torch.from_numpy(np.array(labels)).cuda() optimizer.zero_grad() outputs = model(inputs) # ce_loss = get_ce_loss(outputs, labels) labels_onehot = torch.zeros(outputs.shape[0], 50030).cuda().scatter_( 1, labels.unsqueeze_(1), 1) ce_loss = acc_loss(labels_onehot, F.softmax(outputs, 1)) loss += ce_loss loss.backward() #torch.cuda.synchronize() optimizer.step() #torch.cuda.synchronize() log_file.close()
def eval_turn(Config, model, data_loader, val_version, epoch_num): model.train(False) val_corrects1 = 0 val_corrects2 = 0 val_corrects3 = 0 val_corrects_s = 0 val_corrects_b = 0 val_corrects_c = 0 val_corrects_t = 0 val_size = data_loader.__len__() item_count = data_loader.total_item_len t0 = time.time() get_l1_loss = nn.L1Loss() get_ce_loss = nn.CrossEntropyLoss() val_batch_size = data_loader.batch_size val_epoch_step = data_loader.__len__() num_cls = data_loader.num_cls val_loss_recorder = LossRecord(val_batch_size) val_celoss_recorder = LossRecord(val_batch_size) print('evaluating %s ...'%val_version, flush=True) with torch.no_grad(): for batch_cnt_val, data_val in enumerate(data_loader): inputs = Variable(data_val[0].cuda()) labels = Variable(torch.from_numpy(np.array(data_val[1])).long().cuda()) outputs = model(inputs) loss = 0 ce_loss = get_ce_loss(outputs[0], labels).item() loss += ce_loss val_loss_recorder.update(loss) val_celoss_recorder.update(ce_loss) if Config.multi: if Config.no_loc: blabels = Variable(torch.from_numpy(np.array(data_val[2])).long().cuda()) clabels = Variable(torch.from_numpy(np.array(data_val[3])).long().cuda()) tlabels = Variable(torch.from_numpy(np.array(data_val[4])).long().cuda()) s_pred = outputs[0] b_pred = outputs[2] c_pred = outputs[3] t_pred = outputs[4] s_pred_confidence, s_pred_predicted = torch.max(s_pred, 1) b_pred_confidence, b_pred_predicted = torch.max(b_pred, 1) c_pred_confidence, c_pred_predicted = torch.max(c_pred, 1) t_pred_confidence, t_pred_predicted = torch.max(t_pred, 1) print('{:s} eval_batch: {:-6d} / {:d} loss: {:8.4f}'.format(val_version, batch_cnt_val, val_epoch_step, loss), flush=True) batch_corrects_s = torch.sum((s_pred_predicted == labels)).data.item() batch_corrects_b = torch.sum((b_pred_predicted == blabels)).data.item() batch_corrects_c = torch.sum((c_pred_predicted == clabels)).data.item() batch_corrects_t = torch.sum((t_pred_predicted == tlabels)).data.item() val_corrects_s += batch_corrects_s val_corrects_b += batch_corrects_b val_corrects_c += batch_corrects_c val_corrects_t += batch_corrects_t else: # outputs_pred = outputs[0] + outputs[1][:,0:num_cls] + outputs[1][:,num_cls:2*num_cls] outputs_pred = outputs[0] top3_val, top3_pos = torch.topk(outputs_pred, 3) print('{:s} eval_batch: {:-6d} / {:d} loss: {:8.4f}'.format(val_version, batch_cnt_val, val_epoch_step, loss), flush=True) batch_corrects1 = torch.sum((top3_pos[:, 0] == labels)).data.item() val_corrects1 += batch_corrects1 batch_corrects2 = torch.sum((top3_pos[:, 1] == labels)).data.item() val_corrects2 += (batch_corrects2 + batch_corrects1) batch_corrects3 = torch.sum((top3_pos[:, 2] == labels)).data.item() val_corrects3 += (batch_corrects3 + batch_corrects2 + batch_corrects1) if Config.multi: if Config.no_loc: val_acc_s = val_corrects_s/item_count val_acc_b = val_corrects_b/item_count val_acc_c = val_corrects_c/item_count val_acc_t = val_corrects_t/item_count t1 = time.time() since = t1-t0 print('--'*30, flush=True) print('% 3d %s %s %s-loss: %.4f ||%s-acc@S: %.4f %s-acc@C: %.4f %s-acc@B: %.4f ||time: %d' % (epoch_num, val_version, dt(), val_version, val_loss_recorder.get_val(init=True), val_version, val_acc_s,val_version, val_acc_c, val_version, val_acc_t, since), flush=True) print('--' * 30, flush=True) return val_acc_s, val_acc_b, val_acc_c,val_acc_t else: val_acc1 = val_corrects1 / item_count val_acc2 = val_corrects2 / item_count val_acc3 = val_corrects3 / item_count # log_file.write(val_version + '\t' +str(val_loss_recorder.get_val())+'\t' + str(val_celoss_recorder.get_val()) + '\t' + str(val_acc1) + '\t' + str(val_acc3) + '\n') t1 = time.time() since = t1-t0 print('--'*30, flush=True) print('% 3d %s %s %s-loss: %.4f ||%s-acc@1: %.4f %s-acc@2: %.4f %s-acc@3: %.4f ||time: %d' % (epoch_num, val_version, dt(), val_version, val_loss_recorder.get_val(init=True), val_version, val_acc1,val_version, val_acc2, val_version, val_acc3, since), flush=True) print('--' * 30, flush=True) return val_acc1, val_acc2, val_acc3
def eval_turn(Config, model, data_loader, val_version, epoch_num, log_file, efd=None): model.train(False) val_corrects1 = 0 val_corrects2 = 0 val_corrects3 = 0 bmy_correct = 0 bm_correct = 0 bb_correct = 0 # 通过bmy求出的品牌精度 val_size = data_loader.__len__() item_count = data_loader.total_item_len t0 = time.time() get_l1_loss = nn.L1Loss() get_ce_loss = nn.CrossEntropyLoss() val_batch_size = data_loader.batch_size val_epoch_step = data_loader.__len__() num_cls = data_loader.num_cls val_loss_recorder = LossRecord(val_batch_size) val_celoss_recorder = LossRecord(val_batch_size) print('evaluating %s ...' % val_version, flush=True) # bmy_id_bm_vo_dict = WxsDsm.get_bmy_id_bm_vo_dict() #bmy_sim_org_dict = WxsDsm.get_bmy_sim_org_dict() with torch.no_grad(): for batch_cnt_val, data_val in enumerate(data_loader): inputs = Variable(data_val[0].cuda()) print('eval_model.eval_turn inputs: {0};'.format(inputs.shape)) brand_labels = Variable( torch.from_numpy(np.array(data_val[1])).long().cuda()) bmy_labels = Variable( torch.from_numpy(np.array(data_val[-1])).long().cuda()) img_files = data_val[-2] outputs = model(inputs) loss = 0 ce_loss = get_ce_loss(outputs[0], brand_labels).item() loss += ce_loss val_loss_recorder.update(loss) val_celoss_recorder.update(ce_loss) if Config.use_dcl and Config.cls_2xmul: outputs_pred = outputs[0] + outputs[1][:, 0:num_cls] + outputs[ 1][:, num_cls:2 * num_cls] else: outputs_pred = outputs[0] top3_val, top3_pos = torch.topk(outputs_pred, 3) print('{:s} eval_batch: {:-6d} / {:d} loss: {:8.4f}'.format( val_version, batch_cnt_val, val_epoch_step, loss), flush=True) batch_corrects1 = torch.sum( (top3_pos[:, 0] == brand_labels)).data.item() val_corrects1 += batch_corrects1 batch_corrects2 = torch.sum( (top3_pos[:, 1] == brand_labels)).data.item() val_corrects2 += (batch_corrects2 + batch_corrects1) batch_corrects3 = torch.sum( (top3_pos[:, 2] == brand_labels)).data.item() val_corrects3 += (batch_corrects3 + batch_corrects2 + batch_corrects1) # 求出年款精度 outputs_bmy = outputs[-1] bmy_top5_val, bmy_top5_pos = torch.topk(outputs_bmy, 5) batch_bmy_correct = torch.sum( (bmy_top5_pos[:, 0] == bmy_labels)).data.item() bmy_correct += batch_bmy_correct bb_correct = 0 # 求出车型精度 batch_bm_correct = 0 for im in range(bmy_top5_pos.shape[0]): gt_bmy_id = bmy_top5_pos[im][0].item() net_bmy_id = bmy_labels[im].item() if gt_bmy_id in bmy_id_bm_vo_dict: gt_bm_vo = bmy_id_bm_vo_dict[gt_bmy_id] net_bm_vo = bmy_id_bm_vo_dict[net_bmy_id] if gt_bm_vo['model_id'] == net_bm_vo['model_id']: batch_bm_correct += 1 bm_correct += batch_bm_correct # 找出品牌错误的样本,写入文件top1_error_samples if efd is not None: for idx in range(top3_pos.shape[0]): if top3_pos[idx][0] != brand_labels[idx]: efd.write('{0}*{1}*{2}\n'.format( img_files[idx], brand_labels[idx], top3_pos[idx][0])) ''' # pred_size = top3_pos[:, 0].shape[0] batch_bb_correct = 0 for idx in range(pred_size): pred_bmy = fgvc_id_brand_dict[int(top3_pos[idx][0])] pred_brand = pred_bmy.split('_')[0] gt_bmy = fgvc_id_brand_dict[int(labels[idx])] gt_brand = gt_bmy.split('_')[0] if pred_brand == gt_brand: batch_bb_correct += 1 bb_correct += batch_bb_correct brand_correct = 0 ''' val_acc1 = val_corrects1 / item_count val_acc2 = val_corrects2 / item_count val_acc3 = val_corrects3 / item_count bmy_acc = bmy_correct / item_count bm_acc = bm_correct / item_count bb_acc = bb_correct / item_count log_file.write(val_version + '\t' + str(val_loss_recorder.get_val()) + '\t' + str(val_celoss_recorder.get_val()) + '\t' + str(val_acc1) + '\t' + str(val_acc3) + '\n') t1 = time.time() since = t1 - t0 print('--' * 30, flush=True) print( '% 3d %s %s %s-loss: %.4f || 品牌:%s-acc@1: %.4f %s-acc@2: %.4f %s-acc@3: %.4f; 车型:%.4f; 年款:%.4f; ||time: %d' % (epoch_num, val_version, dt(), val_version, val_loss_recorder.get_val(init=True), val_version, val_acc1, val_version, val_acc2, val_version, val_acc3, bm_acc, bmy_acc, since), flush=True) print('--' * 30, flush=True) return val_acc1, val_acc2, val_acc3
def train(Config, model, epoch_num, start_epoch, optimizer, exp_lr_scheduler, data_loader, save_dir, data_size=448, savepoint=500, checkpoint=1000): # savepoint: save without evalution # checkpoint: save with evaluation bmy_weight = 1.0 # 1.5 # 决定品牌分支在学习中权重 step = 0 eval_train_flag = False rec_loss = [] checkpoint_list = [] steps = np.array([], dtype=np.int) train_accs = np.array([], dtype=np.float32) test_accs = np.array([], dtype=np.float32) ce_losses = np.array([], dtype=np.float32) ce_loss_mu = -1 ce_loss_std = 0.0 train_batch_size = data_loader['train'].batch_size train_epoch_step = data_loader['train'].__len__() train_loss_recorder = LossRecord(train_batch_size) if savepoint > train_epoch_step: savepoint = 1 * train_epoch_step checkpoint = savepoint date_suffix = dt() log_file = open( os.path.join( Config.log_folder, 'formal_log_r50_dcl_%s_%s.log' % (str(data_size), date_suffix)), 'a') add_loss = nn.L1Loss() get_ce_loss = nn.CrossEntropyLoss() get_focal_loss = FocalLoss() get_angle_loss = AngleLoss() for epoch in range(start_epoch, epoch_num - 1): model.train(True) save_grad = [] for batch_cnt, data in enumerate(data_loader['train']): step += 1 loss = 0 model.train(True) if Config.use_backbone: inputs, brand_labels, img_names, bmy_labels = data inputs = Variable(inputs.cuda()) brand_labels = Variable( torch.from_numpy(np.array(brand_labels)).cuda()) bmy_labels = Variable( torch.from_numpy(np.array(bmy_labels)).cuda()) if Config.use_dcl: inputs, brand_labels, brand_labels_swap, swap_law, img_names, bmy_labels = data org_brand_labels = brand_labels inputs = Variable(inputs.cuda()) brand_labels = Variable( torch.from_numpy(np.array(brand_labels)).cuda()) bmy_labels = Variable( torch.from_numpy(np.array(bmy_labels)).cuda()) brand_labels_swap = Variable( torch.from_numpy(np.array(brand_labels_swap)).cuda()) swap_law = Variable( torch.from_numpy(np.array(swap_law)).float().cuda()) optimizer.zero_grad() if inputs.size(0) < 2 * train_batch_size: outputs = model(inputs, inputs[0:-1:2]) else: outputs = model(inputs, None) if Config.use_focal_loss: ce_loss_brand = get_focal_loss(outputs[0], brand_labels) ce_loss_bmy = get_focal_loss(outputs[-1], bmy_labels) else: ce_loss_brand = get_ce_loss(outputs[0], brand_labels) ce_loss_bmy = get_ce_loss(outputs[-1], bmy_labels) ce_loss = ce_loss_brand + bmy_weight * ce_loss_bmy if Config.use_Asoftmax: fetch_batch = brand_labels.size(0) if batch_cnt % (train_epoch_step // 5) == 0: angle_loss = get_angle_loss(outputs[3], brand_labels[0:fetch_batch:2], decay=0.9) else: angle_loss = get_angle_loss(outputs[3], brand_labels[0:fetch_batch:2]) loss += angle_loss loss += ce_loss ce_loss_val = ce_loss.detach().item() ce_losses = np.append(ce_losses, ce_loss_val) alpha_ = 1 beta_ = 1 gamma_ = 0.01 if Config.dataset == 'STCAR' or Config.dataset == 'AIR' else 1 if Config.use_dcl: swap_loss = get_ce_loss(outputs[1], brand_labels_swap) * beta_ loss += swap_loss law_loss = add_loss(outputs[2], swap_law) * gamma_ loss += law_loss loss.backward() torch.cuda.synchronize() optimizer.step() exp_lr_scheduler.step(epoch) torch.cuda.synchronize() if Config.use_dcl: if ce_loss_mu > 0 and ce_loss_val > ce_loss_mu + 3.0 * ce_loss_std: # 记录下这个批次,可能是该批次有标注错误情况 print('记录可疑批次信息: loss={0}; threshold={1};'.format( ce_loss_val, ce_loss_mu + 2.0 * ce_loss_std)) with open( './logs/abnormal_samples_{0}_{1}_{2}.txt'.format( epoch, step, ce_loss_val), 'a+') as fd: error_batch_len = len(img_names) for i in range(error_batch_len): fd.write('{0} <=> {1};\r\n'.format( org_brand_labels[i * 2], img_names[i])) print('epoch{}: step: {:-8d} / {:d} loss=ce_loss+' 'swap_loss+law_loss: {:6.4f} = {:6.4f} ' '+ {:6.4f} + {:6.4f} brand_loss: {:6.4f}'.format( epoch, step % train_epoch_step, train_epoch_step, loss.detach().item(), ce_loss_val, swap_loss.detach().item(), law_loss.detach().item(), ce_loss_brand.detach().item()), flush=True) if Config.use_backbone: print('epoch{}: step: {:-8d} / {:d} loss=ce_loss+' 'swap_loss+law_loss: {:6.4f} = {:6.4f} '.format( epoch, step % train_epoch_step, train_epoch_step, loss.detach().item(), ce_loss.detach().item()), flush=True) rec_loss.append(loss.detach().item()) train_loss_recorder.update(loss.detach().item()) # evaluation & save if step % checkpoint == 0: rec_loss = [] print(32 * '-', flush=True) print( 'step: {:d} / {:d} global_step: {:8.2f} train_epoch: {:04d} rec_train_loss: {:6.4f}' .format(step, train_epoch_step, 1.0 * step / train_epoch_step, epoch, train_loss_recorder.get_val()), flush=True) print('current lr:%s' % exp_lr_scheduler.get_lr(), flush=True) ''' if eval_train_flag: trainval_acc1, trainval_acc2, trainval_acc3 = eval_turn(Config, model, data_loader['trainval'], 'trainval', epoch, log_file) if abs(trainval_acc1 - trainval_acc3) < 0.01: eval_train_flag = False ''' print('##### validate dataset #####') trainval_acc1, trainval_acc2, trainval_acc3 = eval_turn( Config, model, data_loader['val'], 'val', epoch, log_file ) #eval_turn(Config, model, data_loader['trainval'], 'trainval', epoch, log_file) print('##### test dataset #####') val_acc1, val_acc2, val_acc3 = trainval_acc1, trainval_acc2, \ trainval_acc3 # eval_turn(Config, model, data_loader['val'], 'val', epoch, log_file) steps = np.append(steps, step) train_accs = np.append(train_accs, trainval_acc1) test_accs = np.append(test_accs, val_acc1) save_path = os.path.join( save_dir, 'weights_%d_%d_%.4f_%.4f.pth' % (epoch, batch_cnt, val_acc1, val_acc3)) torch.cuda.synchronize() torch.save(model.state_dict(), save_path, _use_new_zipfile_serialization=False) print('saved model to %s' % (save_path), flush=True) torch.cuda.empty_cache() # 保存精度等信息并初始化 ce_loss_mu = ce_losses.mean() ce_loss_std = ce_losses.std() print('Cross entropy loss: mu={0}; std={1}; range:{2}~{3};'. format(ce_loss_mu, ce_loss_std, ce_loss_mu - 3.0 * ce_loss_std, ce_loss_mu + 3.0 * ce_loss_std)) ce_losses = np.array([], dtype=np.float32) if train_accs.shape[0] > 30: np.savetxt('./logs/steps1.txt', (steps, )) np.savetxt('./logs/train_accs1.txt', (train_accs, )) np.savetxt('./logs/test_accs1.txt', (test_accs, )) steps = np.array([], dtype=np.int) train_accs = np.array([], dtype=np.float32) test_accs = np.array([], dtype=np.float32) # save only elif step % savepoint == 0: train_loss_recorder.update(rec_loss) rec_loss = [] save_path = os.path.join( save_dir, 'savepoint_weights-%d-%s.pth' % (step, dt())) checkpoint_list.append(save_path) if len(checkpoint_list) == 6: os.remove(checkpoint_list[0]) del checkpoint_list[0] torch.save(model.state_dict(), save_path, _use_new_zipfile_serialization=False) torch.cuda.empty_cache() log_file.close()
def eval_turn(Config, model, data_loader, val_version, epoch_num, log_file): model.train(False) val_corrects1 = 0 val_corrects2 = 0 val_corrects3 = 0 val_size = data_loader.__len__() item_count = data_loader.total_item_len t0 = time.time() get_l1_loss = nn.L1Loss() get_ce_loss = nn.CrossEntropyLoss() val_batch_size = data_loader.batch_size val_epoch_step = data_loader.__len__() num_cls = data_loader.num_cls val_loss_recorder = LossRecord(val_batch_size) val_celoss_recorder = LossRecord(val_batch_size) print('evaluating %s ...' % val_version, flush=True) sum_fbeta = 0 with torch.no_grad(): for batch_cnt_val, data_val in enumerate(data_loader): # inputs = Variable(data_val[0].cuda()) # labels = Variable(torch.LongTensor(np.array(data_val[1])).long().cuda()) inputs, labels, labels_swap, swap_law, img_names = data_val labels_npy = np.array(labels) labels_tensor = Variable(torch.FloatTensor(labels_npy).cuda()) labels_ = labels_npy.astype(np.uint8) inputs = Variable(inputs.cuda()) outputs = model(inputs) loss = 0 # ce_loss = get_ce_loss(outputs[0], labels).item() ce_loss = get_sigmoid_ce(outputs[0], labels_tensor).item() loss += ce_loss val_loss_recorder.update(loss) val_celoss_recorder.update(ce_loss) if Config.use_dcl and Config.cls_2xmul: outputs_pred = outputs[0] + outputs[1][:, 0:num_cls] + outputs[ 1][:, num_cls:2 * num_cls] else: outputs_pred = outputs[0] ######## MAP is label ranking, do not need normilization # predict_multensor = torch.ge(outputs_pred, 0.5) ### 大于0.5的置为一,其他置为0,类似于阈值化操作 predict_mul = outputs_pred.cpu().numpy() temp_fbeta = label_ranking_average_precision_score( labels_, predict_mul) ################################################################# dy modify Micro precision # cor_sum = 0 # num_sum =0 # for j in range(10): # query_col = labels_[j,:] # label_col = predict_mul[j,:] # index = np.where(label_col > 0.5) # index_ = index[0] # number_=index_.size # query_binary = query_col[index] # query_label = label_col[index] # batch_corrects1 = np.count_nonzero(query_binary == query_label) # cor_sum = cor_sum + batch_corrects1 # num_sum = num_sum + number_ # temp_fbeta = cor_sum/num_sum ################################################################## sum_fbeta = sum_fbeta + temp_fbeta ave_num = batch_cnt_val + 1 # top3_val, top3_pos = torch.topk(outputs_pred, 3) # print('{:s} eval_batch: {:-6d} / {:d} loss: {:8.4f}'.format(val_version, batch_cnt_val, val_epoch_step, loss), flush=True) # batch_corrects1 = torch.sum((top3_pos[:, 0] == labels)).data.item() # val_corrects1 += batch_corrects1 # batch_corrects2 = torch.sum((top3_pos[:, 1] == labels)).data.item() # val_corrects2 += (batch_corrects2 + batch_corrects1) # batch_corrects3 = torch.sum((top3_pos[:, 2] == labels)).data.item() # val_corrects3 += (batch_corrects3 + batch_corrects2 + batch_corrects1) # val_acc1 = val_corrects1 / item_count # val_acc2 = val_corrects2 / item_count # val_acc3 = val_corrects3 / item_count # log_file.write(val_version + '\t' +str(val_loss_recorder.get_val())+'\t' + str(val_celoss_recorder.get_val()) + '\t' + str(val_acc1) + '\t' + str(val_acc3) + '\n') # t1 = time.time() # since = t1-t0 # print('--'*30, flush=True) # print('% 3d %s %s %s-loss: %.4f ||%s-acc@1: %.4f %s-acc@2: %.4f %s-acc@3: %.4f ||time: %d' % (epoch_num, val_version, dt(), val_version, val_loss_recorder.get_val(init=True), val_version, val_acc1,val_version, val_acc2, val_version, val_acc3, since), flush=True) # print('--' * 30, flush=True) # return val_acc1, val_acc2, val_acc3 ave_acc = sum_fbeta / ave_num log_file.write(val_version + '\t' + str(val_loss_recorder.get_val()) + '\t' + str(val_celoss_recorder.get_val()) + '\t' + str(ave_acc) + '\n') t1 = time.time() since = t1 - t0 print('--' * 30, flush=True) print('% 3d %s %s %s-loss: %.4f ||%s-ave@acc: %.4f ||time: %d' % (epoch_num, val_version, dt(), val_version, val_loss_recorder.get_val(init=True), val_version, ave_acc, since), flush=True) print('--' * 30, flush=True) return ave_acc