len(train_loader) * NUM_EPOCH, loss=losses, top1=top1, top5=top5)) print("=" * 60) # perform validation & save checkpoints per epoch # validation statistics per epoch (buffer for visualization) print("=" * 60) print( "Perform Evaluation on LFW, CFP_FF, CFP_FP, AgeDB, CALFW, CPLFW and VGG2_FP, and Save Checkpoints..." ) accuracy_lfw, best_threshold_lfw, roc_curve_lfw = perform_val( MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, lfw, lfw_issame) buffer_val(writer, "LFW", accuracy_lfw, best_threshold_lfw, roc_curve_lfw, batch + 1) accuracy_cfp_ff, best_threshold_cfp_ff, roc_curve_cfp_ff = perform_val( MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, cfp_ff, cfp_ff_issame) buffer_val(writer, "CFP_FF", accuracy_cfp_ff, best_threshold_cfp_ff, roc_curve_cfp_ff, batch + 1) accuracy_cfp_fp, best_threshold_cfp_fp, roc_curve_cfp_fp = perform_val( MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, cfp_fp, cfp_fp_issame) buffer_val(writer, "CFP_FP", accuracy_cfp_fp, best_threshold_cfp_fp, roc_curve_cfp_fp, batch + 1) accuracy_agedb, best_threshold_agedb, roc_curve_agedb = perform_val( MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, agedb, agedb_issame) buffer_val(writer, "AgeDB", accuracy_agedb, best_threshold_agedb, roc_curve_agedb, batch + 1)
def OneEpoch(epoch, train_loader, OPTIMIZER, DISP_FREQ, NUM_EPOCH_WARM_UP, NUM_BATCH_WARM_UP): losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() batch = 0 #iterator = iter(train_loader) start = time.time() for inputs, labels in train_loader: if (epoch + 1 <= NUM_EPOCH_WARM_UP) and (batch + 1 <= NUM_BATCH_WARM_UP): # adjust LR for each training batch during warm up warm_up_lr(batch + 1, NUM_BATCH_WARM_UP, LR, OPTIMIZER) # compute output inputs = inputs.to(DEVICE, non_blocking=True) labels = labels.to(DEVICE, non_blocking=True).long() features = BACKBONE(inputs) outputs = HEAD(features, labels) loss = LOSS(outputs, labels) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, labels, topk = (1, 5)) losses.update(loss.data.item(), inputs.size(0)) top1.update(prec1.data.item(), inputs.size(0)) top5.update(prec5.data.item(), inputs.size(0)) # compute gradient and do SGD step OPTIMIZER.zero_grad() loss.backward() OPTIMIZER.step() # dispaly training loss & acc every DISP_FREQ if ((batch + 1) % DISP_FREQ == 0) and batch != 0: print("=" * 60) print('Epoch {}/{} Batch {}/{}\t' 'Training Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch + 1, NUM_EPOCH, batch + 1, len(train_loader) * NUM_EPOCH, loss = losses, top1 = top1, top5 = top5)) print("Running speed in the last 100 batches: {:.3f} iter/s.".format(DISP_FREQ / (time.time() - start))) start = time.time() print("=" * 60) batch += 1 epoch_loss = losses.avg epoch_acc = top1.avg writer.add_scalar("Training_Loss", epoch_loss, epoch + 1) writer.add_scalar("Training_Accuracy", epoch_acc, epoch + 1) print("=" * 60) print('Epoch: {}/{}\t' 'Training Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch + 1, NUM_EPOCH, loss = losses, top1 = top1, top5 = top5)) print("=" * 60) # perform validation & save checkpoints per epoch # validation statistics per epoch (buffer for visualization) print("=" * 60) print("Perform Evaluation on LFW, CFP_FF, CFP_FP, AgeDB, CALFW, CPLFW and VGG2_FP, and Save Checkpoints...") accuracy_lfw, best_threshold_lfw, roc_curve_lfw = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, lfw, lfw_issame) buffer_val(writer, "LFW", accuracy_lfw, best_threshold_lfw, roc_curve_lfw, epoch + 1) # accuracy_cfp_ff, best_threshold_cfp_ff, roc_curve_cfp_ff = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, cfp_ff, cfp_ff_issame) # buffer_val(writer, "CFP_FF", accuracy_cfp_ff, best_threshold_cfp_ff, roc_curve_cfp_ff, epoch + 1) # accuracy_cfp_fp, best_threshold_cfp_fp, roc_curve_cfp_fp = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, cfp_fp, cfp_fp_issame) # buffer_val(writer, "CFP_FP", accuracy_cfp_fp, best_threshold_cfp_fp, roc_curve_cfp_fp, epoch + 1) # accuracy_agedb, best_threshold_agedb, roc_curve_agedb = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, agedb, agedb_issame) # buffer_val(writer, "AgeDB", accuracy_agedb, best_threshold_agedb, roc_curve_agedb, epoch + 1) # accuracy_calfw, best_threshold_calfw, roc_curve_calfw = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, calfw, calfw_issame) # buffer_val(writer, "CALFW", accuracy_calfw, best_threshold_calfw, roc_curve_calfw, epoch + 1) # accuracy_cplfw, best_threshold_cplfw, roc_curve_cplfw = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, cplfw, cplfw_issame) # buffer_val(writer, "CPLFW", accuracy_cplfw, best_threshold_cplfw, roc_curve_cplfw, epoch + 1) accuracy_vgg2_fp, best_threshold_vgg2_fp, roc_curve_vgg2_fp = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, vgg2_fp, vgg2_fp_issame) buffer_val(writer, "VGGFace2_FP", accuracy_vgg2_fp, best_threshold_vgg2_fp, roc_curve_vgg2_fp, epoch + 1) print("=" * 60) # save checkpoints per epoch if MULTI_GPU: torch.save(BACKBONE.module.state_dict(), os.path.join(MODEL_ROOT, "Backbone_{}_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format(BACKBONE_NAME, epoch + 1, batch, get_time()))) torch.save(HEAD.state_dict(), os.path.join(MODEL_ROOT, "Head_{}_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format(HEAD_NAME, epoch + 1, batch, get_time()))) else: torch.save(BACKBONE.state_dict(), os.path.join(MODEL_ROOT, "Backbone_{}_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format(BACKBONE_NAME, epoch + 1, batch, get_time()))) torch.save(HEAD.state_dict(), os.path.join(MODEL_ROOT, "Head_{}_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth".format(HEAD_NAME, epoch + 1, batch, get_time())))
if ( (batch + 1) % VER_FREQ == 0 ) and batch != 0: #perform validation & save checkpoints (buffer for visualization) for params in OPTIMIZER.param_groups: lr = params['lr'] break print("Learning rate %f" % lr) print("Perform Evaluation on", TARGET, ", and Save Checkpoints...") acc = [] for ver in vers: name, data_set, issame = ver accuracy, std, xnorm, best_threshold, roc_curve = perform_val( MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, data_set, issame) buffer_val(writer, name, accuracy, std, xnorm, best_threshold, roc_curve, batch + 1) print('[%s][%d]XNorm: %1.5f' % (name, batch + 1, xnorm)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (name, batch + 1, accuracy, std)) print('[%s][%d]Best-Threshold: %1.5f' % (name, batch + 1, best_threshold)) acc.append(accuracy) # save checkpoints per epoch if need_save(acc, highest_acc): if MULTI_GPU: torch.save( BACKBONE.module.state_dict(), os.path.join( WORK_PATH, "Backbone_{}_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth"
print("=" * 60) print( "Perform Evaluation on LFW, CFP_FF, CFP_FP, AgeDB, CALFW, CPLFW and VGG2_FP, and Save Checkpoints..." ) # accuracy_lfw, best_threshold_lfw, roc_curve_lfw = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, lfw, lfw_issame) # buffer_val(writer, "LFW", accuracy_lfw, best_threshold_lfw, roc_curve_lfw, epoch + 1) # accuracy_cfp_ff, best_threshold_cfp_ff, roc_curve_cfp_ff = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, cfp_ff, cfp_ff_issame) # buffer_val(writer, "CFP_FF", accuracy_cfp_ff, best_threshold_cfp_ff, roc_curve_cfp_ff, epoch + 1) # accuracy_cfp_fp, best_threshold_cfp_fp, roc_curve_cfp_fp = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, cfp_fp, cfp_fp_issame) # buffer_val(writer, "CFP_FP", accuracy_cfp_fp, best_threshold_cfp_fp, roc_curve_cfp_fp, epoch + 1) # accuracy_agedb, best_threshold_agedb, roc_curve_agedb = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, agedb, agedb_issame) # buffer_val(writer, "AgeDB", accuracy_agedb, best_threshold_agedb, roc_curve_agedb, epoch + 1) accuracy_calfw, best_threshold_calfw, roc_curve_calfw = perform_val( MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, calfw, calfw_issame) buffer_val(writer, "CALFW", accuracy_calfw, best_threshold_calfw, roc_curve_calfw, epoch + 1) accuracy_cplfw, best_threshold_cplfw, roc_curve_cplfw = perform_val( MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, cplfw, cplfw_issame) buffer_val(writer, "CPLFW", accuracy_cplfw, best_threshold_cplfw, roc_curve_cplfw, epoch + 1) # accuracy_vgg2_fp, best_threshold_vgg2_fp, roc_curve_vgg2_fp = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, vgg2_fp, vgg2_fp_issame) # buffer_val(writer, "VGGFace2_FP", accuracy_vgg2_fp, best_threshold_vgg2_fp, roc_curve_vgg2_fp, epoch + 1) # print("Epoch {}/{}, Evaluation: LFW Acc: {}, CFP_FF Acc: {}, CFP_FP Acc: {}, AgeDB Acc: {}, CALFW Acc: {}, CPLFW Acc: {}, VGG2_FP Acc: {}".format(epoch + 1, NUM_EPOCH, accuracy_lfw, accuracy_cfp_ff, accuracy_cfp_fp, accuracy_agedb, accuracy_calfw, accuracy_cplfw, accuracy_vgg2_fp)) # print("=" * 60) print("Epoch {}/{}, Evaluation: CALFW Acc: {}, CPLFW Acc: {}".format( epoch + 1, NUM_EPOCH, accuracy_calfw, accuracy_cplfw)) print("=" * 60) # save checkpoints per epoch if MULTI_GPU:
print("During Warm Up Process, Epoch {}/{}".format( epoch, NUM_EPOCH_WARM_UP - 1)) print("Epoch {}/{}, Training Loss {} Acc {}".format( epoch, NUM_EPOCH - 1, epoch_loss, epoch_acc)) print("=" * 60) # validation statistics per epoch (buffer for visualization) print("=" * 60) if epoch <= NUM_EPOCH_WARM_UP - 1: print("During Warm Up Process, Epoch {}/{}".format( epoch, NUM_EPOCH_WARM_UP - 1)) print("Perform Validation on AgeDB_30, LFW and CFP_FP...") accuracy_agedb_30, best_threshold_agedb_30, roc_curve_agedb_30 = perform_val( MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, agedb_30, agedb_30_issame) buffer_val(writer, "AgeDB_30", accuracy_agedb_30, best_threshold_agedb_30, roc_curve_agedb_30, epoch) accuracy_lfw, best_threshold_lfw, roc_curve_lfw = perform_val( MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, lfw, lfw_issame) buffer_val(writer, "LFW", accuracy_lfw, best_threshold_lfw, roc_curve_lfw, epoch) accuracy_cfp_fp, best_threshold_cfp_fp, roc_curve_cfp_fp = perform_val( MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, cfp_fp, cfp_fp_issame) buffer_val(writer, "CFP_FP", accuracy_cfp_fp, best_threshold_cfp_fp, roc_curve_cfp_fp, epoch) print( "Epoch {}/{}, Evaluation: AgeDB_30 Acc: {}, LFW Acc: {}, CFP_FP Acc: {}" .format(epoch, NUM_EPOCH - 1, accuracy_agedb_30, accuracy_lfw, accuracy_cfp_fp)) print("=" * 60)
'Training Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch + 1, NUM_EPOCH, loss=losses, top1=top1, top5=top5)) print("=" * 60) # perform validation & save checkpoints per epoch # validation statistics per epoch (buffer for visualization) print("=" * 60) print( "Perform Evaluation on LFW, CFP_FF, CFP_FP, AgeDB, CALFW, CPLFW and VGG2_FP, and Save Checkpoints..." ) accuracy_lfw, best_threshold_lfw, roc_curve_lfw = perform_val( MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, lfw, lfw_issame) buffer_val(writer, "LFW", accuracy_lfw, best_threshold_lfw, roc_curve_lfw, epoch + 1) #print("Epoch {}/{}, Evaluation: LFW Acc: {}, CFP_FF Acc: {}, CFP_FP Acc: {}, AgeDB Acc: {}, CALFW Acc: {}, CPLFW Acc: {}, VGG2_FP Acc: {}".format(epoch + 1, NUM_EPOCH, accuracy_lfw, accuracy_cfp_ff, accuracy_cfp_fp, accuracy_agedb, accuracy_calfw, accuracy_cplfw, accuracy_vgg2_fp)) print("Epoch {}/{}, Evaluation: LFW Acc: {}".format( epoch + 1, NUM_EPOCH, accuracy_lfw)) print("=" * 60) # save checkpoints per epoch if MULTI_GPU: torch.save( BACKBONE.module.state_dict(), os.path.join( MODEL_ROOT, "Backbone_{}_Epoch_{}_Batch_{}_Time_{}_checkpoint.pth". format(BACKBONE_NAME, epoch + 1, batch, get_time()))) torch.save( HEAD.state_dict(),
# dispaly training loss & acc every DISP_FREQ if batch % 2000 == 0 and batch != 0: print("=" * 60) print('Epoch {}/{} Batch {}/{}\t' 'Training Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Training Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 'Training Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( epoch + 1, NUM_EPOCH, batch + 1, len(train_loader) * NUM_EPOCH, loss=losses, top1=top1, top5=top5)) print("=" * 60) # perform validation & save checkpoints per epoch # validation statistics per epoch (buffer for visualization) print("=" * 60) print("Perform Evaluation on LFW, CFP_FF, CFP_FP, AgeDB, CALFW, CPLFW and VGG2_FP, and Save Checkpoints...") accuracy_lfw, best_threshold_lfw, roc_curve_lfw = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, lfw, lfw_issame) buffer_val(writer, "LFW", accuracy_lfw, best_threshold_lfw, roc_curve_lfw, batch + 1) accuracy_cfp_ff, best_threshold_cfp_ff, roc_curve_cfp_ff = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, cfp_ff, cfp_ff_issame) buffer_val(writer, "CFP_FF", accuracy_cfp_ff, best_threshold_cfp_ff, roc_curve_cfp_ff, batch + 1) accuracy_cfp_fp, best_threshold_cfp_fp, roc_curve_cfp_fp = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, cfp_fp, cfp_fp_issame) buffer_val(writer, "CFP_FP", accuracy_cfp_fp, best_threshold_cfp_fp, roc_curve_cfp_fp, batch + 1) accuracy_agedb, best_threshold_agedb, roc_curve_agedb = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, agedb, agedb_issame) buffer_val(writer, "AgeDB", accuracy_agedb, best_threshold_agedb, roc_curve_agedb, batch + 1) accuracy_calfw, best_threshold_calfw, roc_curve_calfw = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, calfw, calfw_issame) buffer_val(writer, "CALFW", accuracy_calfw, best_threshold_calfw, roc_curve_calfw, batch + 1) accuracy_cplfw, best_threshold_cplfw, roc_curve_cplfw = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, cplfw, cplfw_issame) buffer_val(writer, "CPLFW", accuracy_cplfw, best_threshold_cplfw, roc_curve_cplfw, batch + 1) accuracy_vgg2_fp, best_threshold_vgg2_fp, roc_curve_vgg2_fp = perform_val(MULTI_GPU, DEVICE, EMBEDDING_SIZE, BATCH_SIZE, BACKBONE, vgg2_fp, vgg2_fp_issame) buffer_val(writer, "VGGFace2_FP", accuracy_vgg2_fp, best_threshold_vgg2_fp, roc_curve_vgg2_fp, batch + 1) print("Batch {}/{}, Evaluation: LFW Acc: {}, CFP_FF Acc: {}, CFP_FP Acc: {}, AgeDB Acc: {}, CALFW Acc: {}, CPLFW Acc: {}, VGG2_FP Acc: {}".format(batch + 1, len(train_loader) * NUM_EPOCH, accuracy_lfw, accuracy_cfp_ff, accuracy_cfp_fp, accuracy_agedb, accuracy_calfw, accuracy_cplfw, accuracy_vgg2_fp)) print("=" * 60)