def inference(cfg, model, val_loader, num_query): device = cfg.MODEL.DEVICE logger = logging.getLogger("reid_baseline.inference") logger.info("Enter inferencing") if cfg.TEST.RE_RANKING == 'no': print("Create evaluator") evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=100, feat_norm=cfg.TEST.FEAT_NORM), 'R1_mAP_reranking': R1_mAP_reranking(num_query, max_rank=100, feat_norm=cfg.TEST.FEAT_NORM), 'Track_R1_mAP': Track_R1_mAP(num_query, max_rank=100, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) elif cfg.TEST.RE_RANKING == 'yes': print("Create evaluator for reranking") evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=100, feat_norm=cfg.TEST.FEAT_NORM), 'R1_mAP_reranking': R1_mAP_reranking(num_query, max_rank=100, feat_norm=cfg.TEST.FEAT_NORM), 'Track_R1_mAP': Track_R1_mAP(num_query, max_rank=100, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) else: print( "Unsupported re_ranking config. Only support for no or yes, but got {}." .format(cfg.TEST.RE_RANKING)) evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] re_cmc, re_mAP = evaluator.state.metrics['R1_mAP_reranking'] track_cmc, track_mAP = evaluator.state.metrics['Track_R1_mAP'] logger.info('Validation Results') logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10, 100]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) logger.info("re_mAP: {:.1%}".format(re_mAP)) for r in [1, 5, 10, 100]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, re_cmc[r - 1])) logger.info("track_mAP: {:.1%}".format(track_mAP)) for r in [1, 5, 10, 100]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, track_cmc[r - 1]))
def get_valid_eval_map_ebll(self, cfg, source_model, current_model): self.validation_evaluator_map = OrderedDict() long = len(self.valid_dict.items()) list_odict_items = list(self.valid_dict.items()) for i in range(long - 1): name, (_, n_q) = list_odict_items[i] if self.re_ranking: metrics = { "r1_mAP": R1_mAP_reranking(n_q, max_rank=50, if_feat_norm=cfg.TEST.IF_FEAT_NORM) } else: metrics = { "r1_mAP": R1_mAP(n_q, max_rank=50, if_feat_norm=cfg.TEST.IF_FEAT_NORM) } self.validation_evaluator_map[name] = create_source_evaluator( source_model, current_model, metrics=metrics, device=self.device, classify_feature=self.classify_feature) name, (_, n_q) = list_odict_items[long - 1] if self.re_ranking: metrics = { "r1_mAP": R1_mAP_reranking(n_q, max_rank=50, if_feat_norm=cfg.TEST.IF_FEAT_NORM) } else: metrics = { "r1_mAP": R1_mAP(n_q, max_rank=50, if_feat_norm=cfg.TEST.IF_FEAT_NORM) } self.validation_evaluator_map[name] = create_supervised_evaluator( current_model, metrics=metrics, device=self.device, classify_feature=self.classify_feature)
def main(): logger = setup_logger("duke2market", cfg.OUTPUT_DIR, 0, '0214test') # logger.info(cfg) # args = Arguments().parse() os.environ["CUDA_VISIBLE_DEVICES"] = cfg.MODEL.DEVICE_ID cudnn.benchmark = True # ----load dataset------ # train_loader_s, _, _, num_classes = make_data_loader(cfg) train_loader_t, val_loader, num_query, _ = make_data_loader_target(cfg) cfg.DATASETS.NUM_CLASSES_S = num_classes my_model = Base_model(cfg, logger) # -------------- # Evaluator if cfg.TEST.RE_RANKING == 'no': evaluator = create_supervised_evaluator(my_model.Content_Encoder, metrics={'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm='yes')}, device='cuda') else: evaluator = create_supervised_evaluator(my_model.Content_Encoder, metrics={'r1_mAP': R1_mAP_reranking(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device='cuda') # ---------------------test------------------------# model_checkpoint = load_checkpoint(osp.join(working_dir, 'logs/0214_duke2market/duke2market-new.pth.tar')) my_model.Content_Encoder.module.load_state_dict(model_checkpoint['Content_Encoder']) logger.info("=> Training on {} and Testing on {}".format(cfg.DATASETS.NAMES, cfg.DATASETS.TNAMES)) print("=> start testing. Please wait...") evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info("mAP: {:.1%}".format(mAP)) for i in [1, 5, 10, 20, 30, 50]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(i, cmc[i - 1])) logger.info("finished!")
def inference(cfg, model, val_loader, num_query): device = cfg.MODEL.DEVICE logger = logging.getLogger("reid_baseline.inference") logger.info("Enter inferencing") if cfg.TEST.RE_RANKING == 'no': print("Create evaluator") evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) elif cfg.TEST.RE_RANKING == 'yes': print("Create evaluator for reranking") evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP_reranking(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) else: print( "Unsupported re_ranking config. Only support for no or yes, but got {}." .format(cfg.TEST.RE_RANKING)) # import pdb # pdb.set_trace() evaluator.run(val_loader)
def inference( cfg, model, val_loader, num_query ): device = cfg.MODEL.DEVICE with_arm = cfg.TEST.WITH_ARM logger = logging.getLogger("reid_baseline.inference") logger.info("Enter inferencing") if cfg.TEST.RE_RANKING == 'no': print("Create evaluator") if with_arm: evaluator = create_supervised_evaluator(model, metrics={'r1_mAP': R1_mAP_arm(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device=device, with_arm=with_arm) else: evaluator = create_supervised_evaluator(model, metrics={'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device=device, with_arm=with_arm) evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info('Validation Results') logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1]))
def inference( cfg, model, val_loader, num_query, ): device = cfg.MODEL.DEVICE log_period = cfg.SOLVER.LOG_PERIOD output_dir = cfg.OUTPUT_DIR logger = logging.getLogger("reid_baseline.inference") logger.info("Enter inferencing") if cfg.TEST.RE_RANKING == 'no': print("Create evaluator") evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM, remove_camera=True, extract_feat=True) }, device=device) elif cfg.TEST.RE_RANKING == 'yes': print("Create evaluator for reranking") evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP_reranking(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM, remove_camera=True, extract_feat=True) }, device=device) else: print( "Unsupported re_ranking config. Only support for no or yes, but got {}." .format(cfg.TEST.RE_RANKING)) @evaluator.on(Events.ITERATION_COMPLETED) def log_iteration(engine): iter = (engine.state.iteration - 1) % len(val_loader) + 1 if iter % log_period == 0: logger.info("Extract Features. Iteration[{}/{}]".format( iter, len(val_loader))) evaluator.run(val_loader) distmat, cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info('Validation Results') logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) return mAP, cmc[0], cmc[4]
def inference( cfg, model, val_loader, num_query ): device = cfg.MODEL.DEVICE logger = logging.getLogger("reid_baseline.inference") logger.info("Enter inferencing") if cfg.TEST.RE_RANKING == 'no': print("Create evaluator") if 'test_all' in cfg.TEST.TEST_MODE: if len(val_loader.dataset.dataset[0]) == 4: # mask no new eval evaluator = create_supervised_all_evaluator_with_mask(model, metrics={'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, seq_len=cfg.INPUT.SEQ_LEN,device=device) elif len(val_loader.dataset.dataset[0]) == 6: # mask , new eval evaluator = create_supervised_all_evaluator_with_mask_new_eval(model, metrics={'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM,new_eval=True)}, seq_len=cfg.INPUT.SEQ_LEN,device=device) else: evaluator = create_supervised_all_evaluator(model, metrics={'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, seq_len=cfg.INPUT.SEQ_LEN,device=device) else: if len(val_loader.dataset.dataset[0]) == 6: # mask , new eval evaluator = create_supervised_evaluator_with_mask_new_eval(model, metrics={'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM,new_eval=True)}, device=device) elif len(val_loader.dataset.dataset[0]) == 4 : # mask, no new eval evaluator = create_supervised_evaluator_with_mask(model, metrics={'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device=device) else: evaluator = create_supervised_evaluator(model, metrics={'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device=device) elif cfg.TEST.RE_RANKING == 'yes': # haven't implement with mask print("Create evaluator for reranking") if 'test_all' in cfg.TEST.TEST_MODE: evaluator = create_supervised_all_evaluator(model, metrics={'r1_mAP': R1_mAP_reranking(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, seq_len=cfg.INPUT.SEQ_LEN,device=device) else: evaluator = create_supervised_evaluator(model, metrics={'r1_mAP': R1_mAP_reranking(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device=device) else: print("Unsupported re_ranking config. Only support for no or yes, but got {}.".format(cfg.TEST.RE_RANKING)) pbar = ProgressBar(persist=True,ncols=120) pbar.attach(evaluator) evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info('Validation Results') logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1]))
def main(mode, ckpt, logger): logger.info(cfg) os.environ["CUDA_VISIBLE_DEVICES"] = cfg.MODEL.DEVICE_ID cudnn.benchmark = True # ----load dataset------ # train_loader_s, _, _, num_classes = make_data_loader(cfg) train_loader_t, val_loader, num_query, _ = make_data_loader_target(cfg) cfg.DATASETS.NUM_CLASSES_S = num_classes pj_model = BaseModel(cfg) # -------------- # Evaluator if cfg.TEST.RE_RANKING == 'no': evaluator = create_supervised_evaluator(pj_model.Content_Encoder, metrics={'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device='cuda') else: evaluator = create_supervised_evaluator(pj_model.Content_Encoder, metrics={'r1_mAP': R1_mAP_reranking(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device='cuda') start_epoch = best_top1 = 0 # Summary_writer writer = SummaryWriter() # Start training if mode == 'two': if cfg.DATASETS.NAMES == 'dukemtmc': rand_src_1 = np.asarray([0, 2, 4, 6]) rand_src_2 = np.asarray([1, 3, 5, 7]) elif cfg.DATASETS.NAMES == 'market1501': rand_src_1 = np.asarray([0, 1, 4]) rand_src_2 = np.asarray([3, 2, 5]) elif cfg.DATASETS.NAMES == 'msmt17': rand_src_1 = np.asarray([1, 3, 4, 5, 9, 11, 13]) rand_src_2 = np.asarray([0, 2, 6, 7, 8, 10, 12, 14]) for epoch in range(start_epoch, 210): pj_model.two_classifier(epoch, train_loader_s, train_loader_t, writer, logger, rand_src_1, rand_src_2) if ((epoch+1) % 2 == 0): evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info("Validation Results - Epoch: {}".format(epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10, 20]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) is_best = cmc[0] > best_top1 best_top1 = max(cmc[0], best_top1) save_checkpoint({ 'Content_encoder': pj_model.Content_Encoder.module.state_dict(), 'Content_optimizer': pj_model.Content_optimizer.state_dict(), 'Content_optimizer_fix': pj_model.Content_optimizer_fix.state_dict(), 'epoch': epoch + 1, 'best_top1': best_top1, }, is_best, fpath=cfg.OUTPUT_DIR + 'checkpoint.pth.tar', info=ckpt+'.pth.tar') logger.info('\n * Finished epoch {:3d} top1: {:5.1%} best: {:5.1%}{}\n'. format(epoch, cmc[0], best_top1, ' *' if is_best else '')) writer.close()
def get_valid_eval_map(cfg, device, model, valid, re_ranking=False, classify_feature=True): validation_evaluator_map = OrderedDict() for name, (_, n_q) in valid.items(): if re_ranking: metrics = {"r1_mAP": R1_mAP_reranking(n_q, max_rank=50, if_feat_norm=cfg.TEST.IF_FEAT_NORM)} else: metrics = {"r1_mAP": R1_mAP(n_q, max_rank=50, if_feat_norm=cfg.TEST.IF_FEAT_NORM)} validation_evaluator_map[name] = create_supervised_evaluator(model, metrics=metrics, device=device, classify_feature=classify_feature) return validation_evaluator_map
def inference(cfg, model, val_loader, num_query): device = cfg.MODEL.DEVICE logger = logging.getLogger("reid_baseline.inference") logger.info("Enter inferencing") if cfg.TEST.RE_RANKING == 'no': print("Create evaluator") evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info('Validation Results') logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) elif cfg.TEST.RE_RANKING == 'yes': print("Create evaluator for reranking") evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP_reranking(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info('Validation Results') logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) else: evaluator = create_supervised_evaluator( model, metrics={ 'submit': Submit(num_query, max_rank=200, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) evaluator.run(val_loader) img_mat = evaluator.state.metrics['submit'] with open('submit.json', 'w') as f: json.dump(img_mat, f)
def inference(cfg, model, val_loader, num_query): device = cfg.MODEL.DEVICE logger = logging.getLogger("reid_baseline.inference") logger.info("Start inferencing") evaluator = create_supervised_evaluator( model, metrics={'r1_mAP': R1_mAP(num_query)}, device=device) evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info('Validation Results') logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1]))
def validator(cfg, model, camera_model, val_loader, num_query): device = cfg.MODEL.DEVICE log_period = cfg.SOLVER.LOG_PERIOD logger = logging.getLogger("reid_baseline.inference") logger.info("Enter inferencing") if cfg.TEST.RE_RANKING == 'no': print("Create evaluator") evaluator = create_supervised_evaluator( model, camera_model, metrics={ 'r1_mAP': R1_mAP(num_query, True, False, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) elif cfg.TEST.RE_RANKING == 'yes': print("Create evaluator for reranking") evaluator = create_supervised_evaluator( model, camera_model, metrics={ 'r1_mAP': R1_mAP_reranking(num_query, True, False, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) else: print( "Unsupported re_ranking config. Only support for no or yes, but got {}." .format(cfg.TEST.RE_RANKING)) @evaluator.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter = (engine.state.iteration - 1) % len(val_loader) + 1 if iter % log_period == 0: logger.info("Epoch[{}] Iter[{}/{}]".format(engine.state.epoch, iter, len(val_loader))) evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info('Validation Results') logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10, 20, 50]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1]))
def inference(cfg, model, val_loader, num_query): device = cfg.MODEL.DEVICE logger = logging.getLogger("reid_baseline.inference") logger.info("Enter inferencing") if cfg.TEST.RE_RANKING == 'no': print("Create evaluator") evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) elif cfg.TEST.RE_RANKING == 'yes': print("Create evaluator for reranking") evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP_reranking(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) else: print( "Unsupported re_ranking config. Only support for no or yes, but got {}." .format(cfg.TEST.RE_RANKING)) @evaluator.on(Events.ITERATION_COMPLETED) def update(evaluator): print(evaluator.state.output) @evaluator.on(Events.EPOCH_COMPLETED) def calc_heatmap(engine): pass evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info('Validation Results') logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1]))
def inference( cfg, model, val_loader, num_query ): device = cfg.MODEL.DEVICE logger = logging.getLogger("reid_baseline.inference") logger.info("Enter inferencing") print('cfg.TEST.TYPE:', cfg.TEST.TYPE) if cfg.TEST.RE_RANKING == 'no': print("Create evaluator") evaluator = create_supervised_evaluator(model, metrics={'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM, which_type=cfg.TEST.TYPE)}, device=device) # elif cfg.TEST.RE_RANKING == 'yes': # print("Create evaluator for reranking") # evaluator = create_supervised_evaluator(model, metrics={'r1_mAP': R1_mAP_reranking(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM, which_type=cfg.TEST.TYPE)}, device=device)
def get_valid_eval_map(self, cfg, model): self.validation_evaluator_map = OrderedDict() for name, (_, n_q) in self.valid_dict.items(): if self.re_ranking: metrics = { "r1_mAP": R1_mAP_reranking(n_q, max_rank=50, if_feat_norm=cfg.TEST.IF_FEAT_NORM) } else: metrics = { "r1_mAP": R1_mAP(n_q, max_rank=50, if_feat_norm=cfg.TEST.IF_FEAT_NORM) } self.validation_evaluator_map[name] = create_supervised_evaluator( model, metrics=metrics, device=self.device, classify_feature=self.classify_feature)
def inference(cfg, model, val_loader, num_query): device = cfg.MODEL.DEVICE logger = logging.getLogger("reid_baseline.inference") logger.info("Enter inferencing") if cfg.TEST.RE_RANKING == 'no': print("Create evaluator") evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM, fun=eval_func_with_plot) }, device=device) elif cfg.TEST.RE_RANKING == 'yes': print("Create evaluator for reranking") evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP_reranking(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) else: print( "Unsupported re_ranking config. Only support for no or yes, but got {}." .format(cfg.TEST.RE_RANKING)) dataset = init_dataset(cfg.DATASETS.NAMES, root=cfg.DATASETS.ROOT_DIR) transform = T.Compose([T.ToTensor()]) val_dataset = ImageDataset(dataset.query + dataset.gallery, transform) plot(val_dataset, 'good_case', [[16843, 3918, 6980], [7005, 4783, 15962]])
def do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, loss_fn, num_query, start_epoch, time ): log_period = cfg.SOLVER.LOG_PERIOD checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD output_dir = cfg.OUTPUT_DIR device = cfg.MODEL.DEVICE epochs = cfg.SOLVER.MAX_EPOCHS logger = logging.getLogger("clothing change re-id") logger.info("Start training") trainer = create_supervised_trainer(model, optimizer, loss_fn, device=device) if cfg.MODEL.Evaluate == "ClothChangingSetting": print(">>evaluation is based on Cloth Changing Setting") evaluator = create_supervised_evaluator(model, metrics={'r1_mAP_longterm': R1_mAP_longterm(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device=device) elif cfg.MODEL.Evaluate == "StandardSetting": print(">>evaluation is based on Standard Setting") evaluator = create_supervised_evaluator(model, metrics={'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device=device) elif cfg.MODEL.Evaluate == "both": print(">>evaluation is based on both settings: Standard Setting and Cloth Changing Setting") evaluator1 = create_supervised_evaluator(model, metrics={'r1_mAP_longterm': R1_mAP_longterm(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device=device) evaluator2 = create_supervised_evaluator(model, metrics={'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device=device) evaluator = (evaluator1, evaluator2) elif cfg.MODEL.Evaluate == "skip": pass else: raise ValueError('Only support pretrain_choice for \'ClothChangingSetting\' and \'StandardSetting\', but got {}'.format(cfg.MODEL.Evaluate)) # checkpointer = torch.save({'epoch': trainer.state.epoch, # 'model_state_dict': model.state_dict(), # 'optimizer_state_dict': optimizer.state_dict()}, output_dir) import os output_dir = os.path.join(output_dir, time) checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.NAME, checkpoint_period, n_saved=10, require_empty=False) timer = Timer(average=True) trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'model': model, 'optimizer': optimizer}) #trainer.add_event_handler(Events.EPOCH_COMPLETED(every=40), checkpointer, {'model': model, 'optimizer': optimizer}) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # average metric to attach on trainer RunningAverage(output_transform=lambda x: x[0]).attach(trainer, 'avg_loss') RunningAverage(output_transform=lambda x: x[1]).attach(trainer, 'avg_acc') @trainer.on(Events.STARTED) def start_training(engine): engine.state.epoch = start_epoch @trainer.on(Events.EPOCH_STARTED) def adjust_learning_rate(engine): scheduler.step() @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): global ITER ITER += 1 if ITER % log_period == 0: logger.info("Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, Base Lr: {:.2e}" .format(engine.state.epoch, ITER, len(train_loader), engine.state.metrics['avg_loss'], engine.state.metrics['avg_acc'], scheduler.get_lr()[0])) if len(train_loader) == ITER: ITER = 0 # adding handlers using `trainer.on` decorator API @trainer.on(Events.EPOCH_COMPLETED) def print_times(engine): logger.info('Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]' .format(engine.state.epoch, timer.value() * timer.step_count, train_loader.batch_size / timer.value())) logger.info('-' * 10) timer.reset() @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): if engine.state.epoch % eval_period == 0: if cfg.MODEL.Evaluate == "both": evaluator[0].run(val_loader) cmc, mAP = evaluator[0].state.metrics['r1_mAP_longterm'] logger.info("Cloth Changing Validation Results - Epoch: {}".format(engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10, 20, 50]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) evaluator[1].run(val_loader) cmc, mAP = evaluator[1].state.metrics['r1_mAP'] logger.info("Standard Validation Results - Epoch: {}".format(engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10, 20, 50]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) elif cfg.MODEL.Evaluate == "StandardSetting": evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info("Here Are the Validation Results - Epoch: {}".format(engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10, 20, 50]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) elif cfg.MODEL.Evaluate == "ClothChangingSetting": evaluator[0].run(val_loader) cmc, mAP = evaluator[0].state.metrics['r1_mAP_longterm'] logger.info("Cloth Changing Validation Results - Epoch: {}".format(engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10, 20, 50]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) elif cfg.MODEL.Evaluate == "skip": pass else: ValueError("set 'cfg.MODEL.Evaluate' appropriately") trainer.run(train_loader, max_epochs=epochs)
def do_train_with_center(cfg, model, center_criterion, train_loader, val_loader, optimizer, optimizer_center, scheduler, loss_fn, num_query, start_epoch): log_period = cfg.SOLVER.LOG_PERIOD checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD output_dir = cfg.OUTPUT_DIR device = cfg.MODEL.DEVICE epochs = cfg.SOLVER.MAX_EPOCHS logger = logging.getLogger("reid_baseline.train") logger.info("Start training") trainer = create_supervised_trainer_with_center( model, center_criterion, optimizer, optimizer_center, loss_fn, cfg.SOLVER.CENTER_LOSS_WEIGHT, device=device) evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.NAME, checkpoint_period, n_saved=10, require_empty=False) timer = Timer(average=True) trainer.add_event_handler( Events.EPOCH_COMPLETED, checkpointer, { 'model': model, 'optimizer': optimizer, 'center_param': center_criterion, 'optimizer_center': optimizer_center }) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # average metric to attach on trainer RunningAverage(output_transform=lambda x: x[0]).attach(trainer, 'avg_loss') RunningAverage(output_transform=lambda x: x[1]).attach(trainer, 'avg_acc') @trainer.on(Events.STARTED) def start_training(engine): engine.state.epoch = start_epoch @trainer.on(Events.EPOCH_STARTED) def adjust_learning_rate(engine): scheduler.step() @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): global ITER ITER += 1 if ITER % log_period == 0: logger.info( "Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, Base Lr: {:.2e}" .format(engine.state.epoch, ITER, len(train_loader), engine.state.metrics['avg_loss'], engine.state.metrics['avg_acc'], scheduler.get_lr()[0])) if len(train_loader) == ITER: ITER = 0 # adding handlers using `trainer.on` decorator API @trainer.on(Events.EPOCH_COMPLETED) def print_times(engine): logger.info( 'Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]' .format(engine.state.epoch, timer.value() * timer.step_count, train_loader.batch_size / timer.value())) logger.info('-' * 10) timer.reset() @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): if engine.state.epoch % eval_period == 0: evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info("Validation Results - Epoch: {}".format( engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) cmcs = [] ranks = [1, 3, 5, 10, 20, 50] for r in ranks: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format( r, cmc[r - 1])) cmcs.append(cmc[r - 1]) # Write out epoch results to a file. Very useful for quickly checking the accuracy filename = os.path.join(output_dir, '{}.txt'.format(output_dir)) if not os.path.exists(filename): with open(filename, 'w') as f: f.write(','.join(['Epoch', 'mAP'] + [str(x) for x in ranks])) f.write('\n') with open(filename, 'a') as f: res = '{},{},{}\n'.format(engine.state.epoch, mAP, ','.join([str(x) for x in cmcs])) f.write(res) trainer.run(train_loader, max_epochs=epochs)
def inference(cfg, model, val_loader, num_query): device = cfg.MODEL.DEVICE logger = logging.getLogger("TEST clothing change re-id") logger.info("Enter inferencing") if cfg.TEST.RE_RANKING == 'no': print("Create evaluator") evaluator1 = create_supervised_evaluator( model, metrics={ 'r1_mAP_longterm': R1_mAP_longterm(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) evaluator2 = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) evaluator = (evaluator1, evaluator2) elif cfg.TEST.RE_RANKING == 'yes': print("Create evaluator for reranking") evaluator1 = create_supervised_evaluator( model, metrics={ 'r1_mAP_longterm': R1_mAP_reranking(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) evaluator2 = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) evaluator = (evaluator1, evaluator2) else: print( "Unsupported re_ranking config. Only support for no or yes, but got {}." .format(cfg.TEST.RE_RANKING)) evaluator[0].run(val_loader) CC_cmc, CC_mAP = evaluator[0].state.metrics['r1_mAP_longterm'] logger.info('>>>>> TEST: Cloth changing evaluation results:') logger.info("mAP: {:.1%}".format(CC_mAP)) for r in [1, 5, 10, 20, 50]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, CC_cmc[r - 1])) evaluator[1].run(val_loader) SS_cmc, SS_mAP = evaluator[1].state.metrics['r1_mAP'] logger.info('>>>>> TEST: Standard evaluation results:') logger.info("mAP: {:.1%}".format(SS_mAP)) for r in [1, 5, 10, 20, 50]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, SS_cmc[r - 1])) return CC_cmc, CC_mAP, SS_cmc, SS_mAP
def do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_fn, num_query, start_epoch, # add for using self trained model clustering_loader): log_period = cfg.SOLVER.LOG_PERIOD checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD output_dir = cfg.OUTPUT_DIR device = cfg.MODEL.DEVICE epochs = cfg.SOLVER.MAX_EPOCHS clustering_period = cfg.CLUSTERING.PERIOD clustering_stop = cfg.CLUSTERING.STOP with_arm = cfg.TEST.WITH_ARM logger = logging.getLogger("reid_baseline.train") logger.info("Start training") trainer = create_supervised_trainer(model, optimizer, loss_fn, device=device) if with_arm: evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP_arm(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device, with_arm=with_arm) else: evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device, with_arm=with_arm) checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.NAME, checkpoint_period, n_saved=10, require_empty=False) timer = Timer(average=True) trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, { 'model': model, 'optimizer': optimizer }) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # average metric to attach on trainer RunningAverage(output_transform=lambda x: x[0]).attach(trainer, 'avg_loss') RunningAverage(output_transform=lambda x: x[1]).attach(trainer, 'avg_acc') @trainer.on(Events.STARTED) def start_training(engine): engine.state.epoch = start_epoch @trainer.on(Events.EPOCH_STARTED) def adjust_learning_rate(engine): scheduler.step() @trainer.on(Events.EPOCH_STARTED) def adjust_mask_pseudo_labels(engine): if engine.state.epoch % clustering_period == 1 and engine.state.epoch <= clustering_stop: #if False: torch.cuda.empty_cache() feats, pseudo_labels_paths, pids, shape = compute_features( clustering_loader, model, device, with_arm) torch.cuda.empty_cache() cluster_begin = time.time() logger.info('clustering and adjust pseudo-labels begin...') pid_label = set(pids) for label in pid_label: indexs = [i for i in range(len(pids)) if pids[i] == label] feats_I = feats[indexs] pseudo_labels_paths_I = [ pseudo_labels_paths[i] for i in indexs ] cluster_for_each_identity(cfg, feats_I, pseudo_labels_paths_I, shape) logger.info( 'mask adjust use time: {0:.0f} s'.format(time.time() - cluster_begin)) #evaluate the pseudo-part-labels if cfg.DATASETS.NAMES == 'market1501': pred_dir = os.path.join(cfg.DATASETS.ROOT_DIR, 'Market-1501', cfg.DATASETS.PSEUDO_LABEL_SUBDIR) gt_dir = os.path.join(cfg.DATASETS.ROOT_DIR, 'Market-1501', cfg.DATASETS.PREDICTED_GT_SUBDIR) compute_IoU(pred_dir, gt_dir, cfg.CLUSTERING.PART_NUM) elif cfg.DATASETS.NAMES == 'dukemtmc': pred_dir = os.path.join(cfg.DATASETS.ROOT_DIR, 'DukeMTMC-reID', cfg.DATASETS.PSEUDO_LABEL_SUBDIR) gt_dir = os.path.join(cfg.DATASETS.ROOT_DIR, 'DukeMTMC-reID', cfg.DATASETS.PREDICTED_GT_SUBDIR) compute_IoU(pred_dir, gt_dir, cfg.CLUSTERING.PART_NUM) elif cfg.DATASETS.NAMES == 'cuhk03_np_labeled': pred_dir = os.path.join(cfg.DATASETS.ROOT_DIR, 'cuhk03-np/labeled', cfg.DATASETS.PSEUDO_LABEL_SUBDIR) gt_dir = os.path.join(cfg.DATASETS.ROOT_DIR, 'cuhk03-np/labeled', cfg.DATASETS.PREDICTED_GT_SUBDIR) compute_IoU(pred_dir, gt_dir, cfg.CLUSTERING.PART_NUM) elif cfg.DATASETS.NAMES == 'cuhk03_np_detected': pred_dir = os.path.join(cfg.DATASETS.ROOT_DIR, 'cuhk03-np/detected', cfg.DATASETS.PSEUDO_LABEL_SUBDIR) gt_dir = os.path.join(cfg.DATASETS.ROOT_DIR, 'cuhk03-np/detected', cfg.DATASETS.PREDICTED_GT_SUBDIR) compute_IoU(pred_dir, gt_dir, cfg.CLUSTERING.PART_NUM) torch.cuda.empty_cache() @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter = (engine.state.iteration - 1) % len(train_loader) + 1 if iter % log_period == 0: logger.info( "Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, Base Lr: {:.2e}" .format(engine.state.epoch, iter, len(train_loader), engine.state.metrics['avg_loss'], engine.state.metrics['avg_acc'], scheduler.get_lr()[0])) # adding handlers using `trainer.on` decorator API @trainer.on(Events.EPOCH_COMPLETED) def print_times(engine): logger.info( 'Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]' .format(engine.state.epoch, timer.value() * timer.step_count, train_loader.batch_size / timer.value())) logger.info('-' * 10) timer.reset() @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): if engine.state.epoch % eval_period == 0 or engine.state.epoch > 110: evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info("Validation Results - Epoch: {}".format( engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format( r, cmc[r - 1])) torch.cuda.empty_cache() trainer.run(train_loader, max_epochs=epochs)
def do_train( cfg, model, train_loader, val_loader, optimizer, scheduler, loss_fn, num_query, start_epoch, ): log_period = cfg.SOLVER.LOG_PERIOD checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD output_dir = cfg.OUTPUT_DIR device = cfg.MODEL.DEVICE epochs = cfg.SOLVER.MAX_EPOCHS logger = logging.getLogger("reid_baseline.train") logger.info("Start training") trainer = create_supervised_trainer(cfg, model, optimizer, loss_fn, device=device) evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.NAME, checkpoint_period, n_saved=10, require_empty=False) timer = Timer(average=True) trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, { 'model': model.state_dict(), 'optimizer': optimizer.state_dict() }) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) RunningAverage(output_transform=lambda x: x[0]).attach(trainer, 'avg_loss') RunningAverage(output_transform=lambda x: x[1]).attach(trainer, 'avg_acc') if cfg.MODEL.METRIC_LOSS_TYPE == 'ours' or cfg.MODEL.METRIC_LOSS_TYPE == 'ours_triplet': RunningAverage(output_transform=lambda x: x[2]).attach( trainer, 'avg_proxypos') RunningAverage(output_transform=lambda x: x[3]).attach( trainer, 'avg_proxyneg') RunningAverage(output_transform=lambda x: x[4]).attach( trainer, 'avg_possim') RunningAverage(output_transform=lambda x: x[5]).attach( trainer, 'avg_negsim') map_list = [] @trainer.on(Events.STARTED) def start_training(engine): engine.state.epoch = start_epoch @trainer.on(Events.EPOCH_STARTED) def adjust_learning_rate(engine): if cfg.MODEL.METRIC_LOSS_TYPE == 'ours' or cfg.MODEL.METRIC_LOSS_TYPE == 'triplets': pass else: scheduler.step() @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter = (engine.state.iteration - 1) % len(train_loader) + 1 if iter % log_period == 0: if cfg.MODEL.METRIC_LOSS_TYPE == 'ours' or cfg.MODEL.METRIC_LOSS_TYPE == 'ours_triplet': logger.info( "Epoch[{}] Iteration[{}/{}] Loss: {:.3f}\tAcc: {:.3f}\nProxyPos: {:.3f}\tProxyNeg: {:.3f}\tPosSim {:.3f}\tNegSim {:.3f}\tBase Lr: {:.2e}" .format(engine.state.epoch, iter, len(train_loader), engine.state.metrics['avg_loss'], engine.state.metrics['avg_acc'], engine.state.metrics['avg_proxypos'], engine.state.metrics['avg_proxyneg'], engine.state.metrics['avg_possim'], engine.state.metrics['avg_negsim'], optimizer.param_groups[0]['lr'])) else: logger.info( "Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, Base Lr: {:.2e}" .format(engine.state.epoch, iter, len(train_loader), engine.state.metrics['avg_loss'], engine.state.metrics['avg_acc'], scheduler.get_lr()[0])) # adding handlers using `trainer.on` decorator API @trainer.on(Events.EPOCH_COMPLETED) def print_times(engine): logger.info( 'Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]' .format(engine.state.epoch, timer.value() * timer.step_count, train_loader.batch_size / timer.value())) logger.info('-' * 10) timer.reset() @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): #import pdb; pdb.set_trace() if engine.state.epoch % eval_period == 0: evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info("Validation Results - Epoch: {}".format( engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format( r, cmc[r - 1])) map_list.append(mAP) if cfg.MODEL.METRIC_LOSS_TYPE == 'ours' or cfg.MODEL.METRIC_LOSS_TYPE == 'triplets': if optimizer.param_groups[0][ 'lr'] == 3.5e-4 or optimizer.param_groups[0][ 'lr'] == 1e-4: tolenrance = 3 #if engine.state.epoch > 20: # tolenrance = 1 elif optimizer.param_groups[0]['lr'] == 7.0e-5: tolenrance = 3 elif optimizer.param_groups[0]['lr'] == 1.4e-5: tolenrance = 3 elif optimizer.param_groups[0]['lr'] == 3.5e-5: tolenrance = 6 else: tolenrance = 1000 #map_list.append(mAP) if len(map_list) > tolenrance and max( map_list[-tolenrance:]) < max(map_list[:-tolenrance]): adjust_learning_rate_auto(cfg.MODEL.ADJUST_LR, optimizer) #logger.info(map_list) logger.info('The max mAP is {:.1%}'.format(max(map_list))) logger.info('The max mAP is Epoch {}'.format( map_list.index(max(map_list)))) trainer.run(train_loader, max_epochs=epochs)
def inference(cfg, model, val_loader, num_query, datasets): device = cfg.MODEL.DEVICE aligned_test = cfg.MODEL.ALIGNED adjust_rerank = cfg.TEST.ADJUST_RERANK pcb_test = cfg.MODEL.PCB ggdist_path = cfg.TEST.SAVE_DIST_GG qqdist_path = cfg.TEST.SAVE_DIST_QQ qgdist_path = cfg.TEST.SAVE_DIST_QG savedist_path = [ggdist_path, qqdist_path, qgdist_path] merge = cfg.TEST.MERGE new_pcb_test = cfg.MODEL.NEW_PCB logger = logging.getLogger("reid_baseline.inference") logger.info("Enter inferencing") if cfg.TEST.RE_RANKING == 'no': print("Create evaluator") evaluator = create_supervised_evaluator( model, aligned_test, pcb_test, new_pcb_test, metrics={ 'r1_mAP': R1_mAP(num_query, aligned_test, datasets, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) elif cfg.TEST.RE_RANKING == 'yes': print("Create evaluator for reranking") evaluator = create_supervised_evaluator( model, aligned_test, pcb_test, new_pcb_test, metrics={ 'r1_mAP': R1_mAP_reranking(num_query, datasets, aligned_test, pcb_test, new_pcb_test, adjust_rerank, savedist_path, merge, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) else: print( "Unsupported re_ranking config. Only support for no or yes, but got {}." .format(cfg.TEST.RE_RANKING)) evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info('Validation Results') logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1]))
def do_train(cfg, model, train_loader, val_loader, optimizer, scheduler, loss_fn, num_query, start_epoch, device_id, train_camstyle_loader): log_period = cfg.SOLVER.LOG_PERIOD checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD output_dir = cfg.OUTPUT_DIR epochs = cfg.SOLVER.MAX_EPOCHS device = cfg.MODEL.DEVICE logger = logging.getLogger("reid_baseline.train") logger.info("Start training") trainer = create_supervised_trainer(model, optimizer, loss_fn, device=device, device_id=device_id) evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, True, False, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device, device_id=device_id) if device_id == 0: checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.NAME, checkpoint_period, n_saved=10, require_empty=False) trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, { 'model': model.state_dict(), 'optimizer': optimizer.state_dict() }) timer = Timer(average=True) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # average metric to attach on trainer RunningAverage(output_transform=lambda x: x[0]).attach(trainer, 'avg_loss') RunningAverage(output_transform=lambda x: x[1]).attach(trainer, 'avg_acc') RunningAverage(output_transform=lambda x: x[2]).attach( trainer, 'data_ratio') @trainer.on(Events.STARTED) def start_training(engine): engine.state.epoch = start_epoch @trainer.on(Events.EPOCH_STARTED) def adjust_learning_rate(engine): scheduler.step() def cycle(iterable): while True: for i in iterable: yield i train_loader_iter = cycle(train_loader) train_camstyle_loader_iter = cycle(train_camstyle_loader) @trainer.on(Events.ITERATION_STARTED) def generate_batch(engine): current_iter = engine.state.iteration batch = next(train_loader_iter) camstyle_batch = next(train_camstyle_loader_iter) engine.state.batch = [batch, camstyle_batch] @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter = (engine.state.iteration - 1) % len(train_loader) + 1 if iter % log_period == 0: logger.info( "Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, ratio of data/cam_data: {:.3f}, Base Lr: {:.2e}" .format(engine.state.epoch, iter, len(train_loader), engine.state.metrics['avg_loss'], engine.state.metrics['avg_acc'], engine.state.metrics['data_ratio'], scheduler.get_lr()[0])) # adding handlers using `trainer.on` decorator API @trainer.on(Events.EPOCH_COMPLETED) def print_times(engine): logger.info( 'Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]' .format(engine.state.epoch, timer.value() * timer.step_count, train_loader.batch_size / timer.value())) logger.info('-' * 10) timer.reset() @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): if engine.state.epoch % eval_period == 0: evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info("Validation Results - Epoch: {}".format( engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format( r, cmc[r - 1])) num_iters = len(train_loader) data = list(range(num_iters)) trainer.run(data, max_epochs=epochs)
def do_train(cfg, model, train_loader, val_loader, optimizer, scheduler, loss_fn, num_query, start_epoch, image_map_label2, num_classes2): # ---------------------- LOSS start----------------------------- print('----------Initialize Loss Start...') criterion = torch.nn.CrossEntropyLoss() criterion_lsr = LSR() criterion_mse = torch.nn.MSELoss() #(size_average=True) criterion_lsr_direction = LSR_direction() criterion_adaptive_lsr = AdaptiveLSR(0.25) criterion_lsr.set_epsilon(0.1) criterion_lsr_direction.set_alpha(0.6) criterion_lsr_direction.set_beta(0.15) print('******\nalpha:', criterion_lsr_direction.alpha, ' beta:', criterion_lsr_direction.beta) same_id_list = get_same_id_list(image_map_label2) criterion_lsr_direction.set_mask(same_id_list, num_classes2) mask_tensor_matrix = torch.zeros(num_classes2, num_classes2) eplsion = [1, 1, 1] for ids_item in same_id_list: if len(ids_item) == 2: mask_tensor_matrix[ids_item[0], ids_item[1]] = eplsion[1] if len(ids_item) == 3: mask_tensor_matrix[ids_item[0], ids_item[1]] = eplsion[2] / 3 mask_tensor_matrix[ids_item[0], ids_item[2]] = eplsion[2] / 3 mask_tensor_matrix[ids_item[1], ids_item[2]] = eplsion[2] / 3 mask_tensor_matrix = mask_tensor_matrix.float() #mask_tensor_matrix = Variable(mask_tensor_matrix.cuda()) print('mask_tensor_matrix.shape:', mask_tensor_matrix.shape, type(mask_tensor_matrix), '\n\n\n') print('----------Initialize Loss End!!!') # --------------------------------------------------------- global mAP_path, model_dir mAP_path = osp.join(cfg.OUTPUT_DIR, 'map_cmc.txt') model_dir = cfg.OUTPUT_DIR map_cmc_txt = open(mAP_path, 'a+') map_cmc_txt.close() log_period = cfg.SOLVER.LOG_PERIOD checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD output_dir = cfg.OUTPUT_DIR device = cfg.MODEL.DEVICE epochs = cfg.SOLVER.MAX_EPOCHS logger = logging.getLogger("reid_baseline.train") logger.info("Start training") trainer = create_supervised_trainer( model, optimizer, loss_fn, criterion, criterion_mse, criterion_lsr, criterion_adaptive_lsr, criterion_lsr_direction, mask_tensor_matrix, device, cfg.SOLVER.MIXUP, cfg.SOLVER.RICAP, cfg.MODEL.FREEZE_BASE, cfg.MODEL.FREEZE_BASE_EPOCH) #evaluator = create_supervised_evaluator(model, metrics={'r1_mAP': R1_mAP_reranking(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device=device) evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.NAME, checkpoint_period, n_saved=3, require_empty=False) timer = Timer(average=True) trainer.add_event_handler( Events.EPOCH_COMPLETED, checkpointer, { 'model': model, #.state_dict(), 'optimizer': optimizer }) #.state_dict()}) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # average metric to attach on trainer RunningAverage(output_transform=lambda x: x[0]).attach(trainer, 'avg_loss') RunningAverage(output_transform=lambda x: x[1]).attach(trainer, 'avg_acc') @trainer.on(Events.STARTED) def start_training(engine): engine.state.epoch = start_epoch @trainer.on(Events.EPOCH_STARTED) def adjust_learning_rate(engine): if cfg.SOLVER.MY_WARMUP == 'yes': if engine.state.epoch <= cfg.SOLVER.MY_WARMUP_EPOCH: print('--- warmup') else: scheduler.step() else: scheduler.step() @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): global ITER ITER += 1 if ITER % log_period == 0: if cfg.SOLVER.MY_SCHEDULER == 'yes': logger.info( "Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}". format(engine.state.epoch, ITER, len(train_loader), engine.state.metrics['avg_loss'], engine.state.metrics['avg_acc'])) else: logger.info( "Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, Base Lr: {:.2e}" .format(engine.state.epoch, ITER, len(train_loader), engine.state.metrics['avg_loss'], engine.state.metrics['avg_acc'], scheduler.get_lr()[0])) if len(train_loader) == ITER: ITER = 0 # adding handlers using `trainer.on` decorator API @trainer.on(Events.EPOCH_COMPLETED) def print_times(engine): logger.info( 'Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.2f}[samples/s]' .format(engine.state.epoch, timer.value() * timer.step_count, train_loader.batch_size / timer.value())) logger.info('-' * 10) timer.reset() @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): global best_mAP, best_epoch, mAP_path, save_flag if engine.state.epoch % eval_period == 0: evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info("Validation Results - Epoch: {}".format( engine.state.epoch)) logger.info("[Epoch {}] mAP: {:.2%}".format( engine.state.epoch, mAP)) for r in [1, 5, 10, 20]: logger.info("CMC curve, Rank-{:<3}:{:.2%}".format( r, cmc[r - 1])) if float(mAP) > float(best_mAP): print('+++ get best_mAP: ', best_mAP, '-->', mAP) best_mAP = mAP best_epoch = int(engine.state.epoch) save_flag = True print(' set save_flag: True') map_cmc_txt = open(mAP_path, 'a+') map_cmc_txt.write( "Epoch[{}] best_mAP: {:.2f} best_epoch: {} \n".format( engine.state.epoch, best_mAP * 100, best_epoch)) map_cmc_txt.write( " mAP: {:.2f} Rank-1: {:.2f} Rank-5: {:.2f} Rank-10: {:.2f} Rank-20: {:.2f}\n" .format( float(mAP) * 100, cmc[0] * 100, cmc[4] * 100, cmc[9] * 100, cmc[19] * 100)) map_cmc_txt.flush() os.fsync(map_cmc_txt) map_cmc_txt.close() trainer.run(train_loader, max_epochs=epochs)
def do_mt_train(cfg, model, train_loader, val_loader, optimizer, scheduler, loss_fn, num_query, start_epoch, weight): log_period = cfg.SOLVER.LOG_PERIOD checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD output_dir = cfg.OUTPUT_DIR device = cfg.MODEL.DEVICE epochs = cfg.SOLVER.MAX_EPOCHS logger = logging.getLogger("reid_mt.train") logger.info("Start training") trainer = create_mt_supervised_trainer(model, optimizer, loss_fn, weight, device=device) evaluator = create_mt_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.NAME, checkpoint_period, n_saved=20, require_empty=False, save_as_state_dict=True) timer = Timer(average=True) trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, { 'model': model, 'optimizer': optimizer }) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # average metric to attach on trainer RunningAverage(output_transform=lambda x: x[0]).attach(trainer, 'avg_loss') RunningAverage(output_transform=lambda x: x[1]).attach( trainer, 'avg_loss_seg') RunningAverage(output_transform=lambda x: x[2]).attach( trainer, 'avg_loss_attr') RunningAverage(output_transform=lambda x: x[3]).attach( trainer, 'avg_loss_parts') RunningAverage(output_transform=lambda x: x[4]).attach( trainer, 'avg_loss_glb') RunningAverage(output_transform=lambda x: x[5]).attach( trainer, 'avg_loss_anchor') RunningAverage(output_transform=lambda x: x[6]).attach( trainer, 'avg_loss_mask') RunningAverage(output_transform=lambda x: x[7]).attach( trainer, 'avg_loss_triplet') RunningAverage(output_transform=lambda x: x[8]).attach( trainer, 'avg_loss_dv') RunningAverage(output_transform=lambda x: x[9]).attach(trainer, 'avg_acc') @trainer.on(Events.STARTED) def start_training(engine): engine.state.epoch = start_epoch @trainer.on(Events.EPOCH_STARTED) def adjust_learning_rate(engine): scheduler.step() @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): global ITER ITER += 1 if ITER % log_period == 0: logger.info("Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Glb: {:.3f} Mask: {:.3f} \ Anchor: {:.3f} Parts: {:.3f} Triplet: {:.3f} Seg: {:.3f} Attr: {:.3f} Div: {:.3f} Acc: {:.3f}, Base Lr: {:.2e}" .format(engine.state.epoch, ITER, len(train_loader), engine.state.metrics['avg_loss'], \ engine.state.metrics['avg_loss_glb'], \ engine.state.metrics['avg_loss_mask'], \ engine.state.metrics['avg_loss_anchor'], \ engine.state.metrics['avg_loss_parts'], \ engine.state.metrics['avg_loss_triplet'], \ engine.state.metrics['avg_loss_seg'], \ engine.state.metrics['avg_loss_attr'], \ engine.state.metrics['avg_loss_dv'], \ engine.state.metrics['avg_acc'], \ scheduler.get_lr()[0])) if len(train_loader) == ITER: ITER = 0 # adding handlers using `trainer.on` decorator API @trainer.on(Events.EPOCH_COMPLETED) def print_times(engine): logger.info( 'Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]' .format(engine.state.epoch, timer.value() * timer.step_count, train_loader.batch_size / timer.value())) logger.info('-' * 10) timer.reset() @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): if engine.state.epoch % eval_period == 0: evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info("Validation Results - Epoch: {}".format( engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format( r, cmc[r - 1])) print('Start iteration') trainer.run(train_loader, max_epochs=epochs)
def do_train_with_center2( cfg, model, center_criterion, cluster_criterion, # train_loader, val_loader, target_train_loader,# target_val_loader, optimizer, optimizer_center, optimizer_cluster, # scheduler, loss_fn, loss_cluster_fn, # num_query, start_epoch, my_start_epoch ): log_period = cfg.SOLVER.LOG_PERIOD checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD output_dir = cfg.OUTPUT_DIR device = cfg.MODEL.DEVICE epochs = cfg.SOLVER.MAX_EPOCHS logger = logging.getLogger("reid_baseline.train") logger.info("Start training") trainer = create_supervised_trainer_with_center( model, center_criterion, cluster_criterion,# optimizer, optimizer_center, optimizer_cluster,# loss_fn, loss_cluster_fn,# cfg.SOLVER.CENTER_LOSS_WEIGHT, cfg.SOLVER.CLUSTER_LOSS_WEIGHT, target_train_loader,# logger, my_start_epoch, device=device ) evaluator = create_supervised_evaluator(model, metrics={'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM)}, device=device) checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.NAME, checkpoint_period, n_saved=10, require_empty=False) timer = Timer(average=True) trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, {'model': model, 'optimizer': optimizer, 'center_param': center_criterion, 'optimizer_center': optimizer_center, 'optimizer_cluster': optimizer_cluster, 'cluster_param': cluster_criterion}) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # average metric to attach on trainer RunningAverage(output_transform=lambda x: x[0]).attach(trainer, 'avg_loss') RunningAverage(output_transform=lambda x: x[1]).attach(trainer, 'avg_acc') @trainer.on(Events.STARTED) def start_training(engine): engine.state.epoch = start_epoch """ evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info("Source Validation Results - Epoch: {}".format(engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) evaluator.run(target_val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info("Target Validation Results - Epoch: {}".format(engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) """ @trainer.on(Events.EPOCH_STARTED) def adjust_learning_rate(engine): scheduler.step() @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): global ITER ITER += 1 if ITER % log_period == 0: logger.info("Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, Base Lr: {:.2e}" .format(engine.state.epoch, ITER, len(train_loader), engine.state.metrics['avg_loss'], engine.state.metrics['avg_acc'], scheduler.get_lr()[0])) if len(train_loader) == ITER: ITER = 0 # adding handlers using `trainer.on` decorator API @trainer.on(Events.EPOCH_COMPLETED) def print_times(engine): logger.info('Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]' .format(engine.state.epoch, timer.value() * timer.step_count, train_loader.batch_size / timer.value())) logger.info('-' * 10) timer.reset() @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): if engine.state.epoch % eval_period == 0: evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info("Source Validation Results - Epoch: {}".format(engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) evaluator.run(target_val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info("Target Validation Results - Epoch: {}".format(engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) trainer.run(train_loader, max_epochs=epochs)
def inference(cfg, model, val_loader, num_query): device = cfg.MODEL.DEVICE logger = logging.getLogger("reid_baseline.inference") logger.info("Enter inferencing") if cfg.TEST.RE_RANKING == 'no': print("Create evaluator") evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) elif cfg.TEST.RE_RANKING == 'yes': print("Create evaluator for reranking") evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP_reranking(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) else: print( "Unsupported re_ranking config. Only support for no or yes, but got {}." .format(cfg.TEST.RE_RANKING)) evaluator.run(val_loader) cmc, mAP, max_200_indices, num_q, num_g = evaluator.state.metrics['r1_mAP'] # save 200 img_id query_list_path = '/home/flyingbird/Documents/reid_competition/test/query_a_list.txt' gallery_list_path = '/home/flyingbird/Documents/reid_competition/test/gallery_a_list.txt' query_list = list() with open(query_list_path, 'r') as f: # 测试集中txt文件 lines = f.readlines() for i, line in enumerate(lines): data = line.split(" ") image_name = data[0].split("/")[1] #img_file = os.path.join(r'初赛A榜测试集\query_a', image_name) # 测试集query文件夹 query_list.append(image_name) # gallery_list = [os.path.join(gallery_list_path, x) for x in # 测试集gallery文件夹 # os.listdir(gallery_list_path)] gallery_list = list() with open(gallery_list_path, 'r') as f: # 测试集中txt文件 lines = f.readlines() for i, line in enumerate(lines): data = line.split(" ") image_name = data[0].split("/")[1] #img_file = os.path.join(r'初赛A榜测试集\query_a', image_name) # 测试集query文件夹 gallery_list.append(image_name) #query_num = len(query_list) res_dict = dict() for q_idx in range(num_q): #print(query_list[q_idx]) #print(query_list[q_idx].rindex("\\")) filename = query_list[q_idx] #[query_list[q_idx].rindex("\\") + 1:] #max_200_files = [gallery_list[i][gallery_list[i].rindex("\\") + 1:] for i in max_200_indices[q_idx]] max_200_files = [gallery_list[i] for i in max_200_indices[q_idx]] res_dict[filename] = max_200_files #print(query_list[q_idx], max_200_files) save_path = '/home/flyingbird/Documents/reid_competition/test/rerank_submission_A.json' with open(save_path, 'w', encoding='utf-8') as f: # 提交文件 json.dump(res_dict, f) logger.info('Validation Results') logger.info("mAP: {:.1%}".format(mAP))
def do_train(cfg, model, train_loader, val_loader, optimizer, scheduler, loss_fn, num_query, start_epoch): log_period = cfg.SOLVER.LOG_PERIOD checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD output_dir = cfg.OUTPUT_DIR device = cfg.MODEL.DEVICE epochs = cfg.SOLVER.MAX_EPOCHS logger = logging.getLogger("reid_baseline.train") logger.info("Start training") trainer = create_supervised_trainer(model, optimizer, loss_fn, device=device, gamma=cfg.MODEL.GAMMA, margin=cfg.SOLVER.MARGIN, beta=cfg.MODEL.BETA) if cfg.TEST.PAIR == "no": evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(1, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) elif cfg.TEST.PAIR == "yes": evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP_pair(1, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.NAME, checkpoint_period, n_saved=10, require_empty=False) # checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.NAME, n_saved=10, require_empty=False) timer = Timer(average=True) trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, { 'model': model, 'optimizer': optimizer }) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # average metric to attach on trainer RunningAverage(output_transform=lambda x: x[0]).attach(trainer, 'avg_loss') RunningAverage(output_transform=lambda x: x[1]).attach(trainer, 'avg_acc') dataset = init_dataset(cfg.DATASETS.NAMES, root=cfg.DATASETS.ROOT_DIR) @trainer.on(Events.STARTED) def start_training(engine): engine.state.epoch = start_epoch @trainer.on(Events.EPOCH_STARTED) def adjust_learning_rate(engine): scheduler.step() @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): global ITER ITER += 1 if ITER % log_period == 0: logger.info( "Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, Base Lr: {:.2e}" .format(engine.state.epoch, ITER, len(train_loader), engine.state.metrics['avg_loss'], engine.state.metrics['avg_acc'], scheduler.get_lr()[0])) if len(train_loader) == ITER: ITER = 0 # adding handlers using `trainer.on` decorator API @trainer.on(Events.EPOCH_COMPLETED) def print_times(engine): # multi_person_training_info2() train_loader, val_loader, num_query, num_classes = make_data_loader_train( cfg) logger.info( 'Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]' .format(engine.state.epoch, timer.value() * timer.step_count, train_loader.batch_size / timer.value())) logger.info('-' * 10) timer.reset() @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): # if engine.state.epoch % eval_period == 0: if engine.state.epoch >= eval_period: all_cmc = [] all_AP = [] num_valid_q = 0 q_pids = [] for query_index in tqdm(range(num_query)): val_loader = make_data_loader_val(cfg, query_index, dataset) evaluator.run(val_loader) cmc, AP, q_pid = evaluator.state.metrics['r1_mAP'] if AP >= 0: if cmc.shape[0] < 50: continue num_valid_q += 1 all_cmc.append(cmc) all_AP.append(AP) q_pids.append(int(q_pid)) else: continue all_cmc = np.asarray(all_cmc).astype(np.float32) cmc = all_cmc.sum(0) / num_valid_q mAP = np.mean(all_AP) logger.info("Validation Results - Epoch: {}".format( engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format( r, cmc[r - 1])) trainer.run(train_loader, max_epochs=epochs)
def do_train(cfg, model, train_loader, val_loader, optimizer, scheduler, loss_fn, num_query, start_epoch): log_period = cfg.SOLVER.LOG_PERIOD checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD output_dir = cfg.OUTPUT_DIR device = cfg.MODEL.DEVICE epochs = cfg.SOLVER.MAX_EPOCHS logger = logging.getLogger("reid_baseline.train") logger.info("Start training") trainer = create_supervised_trainer(model, optimizer, loss_fn, cfg=cfg, device=device) evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.NAME, checkpoint_period, n_saved=epochs, require_empty=False, start_iter=start_epoch) timer = Timer(average=True) trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, { 'model': model, 'optimizer': optimizer }) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # average metric to attach on trainer RunningAverage(output_transform=lambda x: x[0]).attach(trainer, 'avg_loss') RunningAverage(output_transform=lambda x: x[1]).attach(trainer, 'avg_acc') @trainer.on(Events.STARTED) def start_training(engine): engine.state.epoch = start_epoch engine.state.total_iteration = 0 @trainer.on(Events.EPOCH_STARTED) def adjust_learning_rate(engine): scheduler.step() engine.state.iteration = 0 @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): iter = (engine.state.iteration - 1) % len(train_loader) + 1 if iter % log_period == 0: logger.info( "Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, Base Lr: {:.2e}" .format(engine.state.epoch, iter, len(train_loader), engine.state.metrics['avg_loss'], engine.state.metrics['avg_acc'], scheduler.get_lr()[0])) # adding handlers using `trainer.on` decorator API @trainer.on(Events.EPOCH_COMPLETED) def print_times(engine): logger.info( 'Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]' .format(engine.state.epoch, timer.value() * timer.step_count, train_loader.batch_size / timer.value())) logger.info('-' * 10) timer.reset() @evaluator.on(Events.ITERATION_COMPLETED) def log_evaluate_extract_features(engine): iter = (engine.state.iteration - 1) % len(val_loader) + 1 if iter % log_period == 0: logger.info("Extract Features Iteration[{}/{}]".format( iter, len(val_loader))) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): if engine.state.epoch % eval_period == 0 or engine.state.epoch > 120: evaluator.run(val_loader) cmc, mAP = evaluator.state.metrics['r1_mAP'] logger.info("Validation Results - Epoch: {}".format( engine.state.epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format( r, cmc[r - 1])) trainer.run(train_loader, max_epochs=epochs)
def do_train(cfg, model, train_loader, val_loader, optimizer, scheduler, loss_fn, num_query, start_epoch, num_classes): log_period = cfg.SOLVER.LOG_PERIOD checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD output_dir = cfg.OUTPUT_DIR device = cfg.MODEL.DEVICE epochs = cfg.SOLVER.MAX_EPOCHS id_association = [ defaultdict(list) for i in range(cfg.DATALOADER.NUM_CAMERA) ] for i in range(cfg.DATALOADER.NUM_CAMERA): for j in range(num_classes[i]): id_association[i][j].append((j, i)) logger = logging.getLogger("reid_baseline.train") logger.info("Start training") if cfg.DATALOADER.SAMPLER == 'softmax_multi_camera': trainer = create_supervised_trainer_multi_camera( model, optimizer, loss_fn, id_association, cfg.DATALOADER.NUM_CAMERA, cfg.DATALOADER.NUM_IDS, cfg.DATALOADER.NUM_INSTANCE, device=device) else: trainer = create_supervised_trainer(model, optimizer, loss_fn, device=device) evaluator = create_supervised_evaluator( model, metrics={ 'r1_mAP': R1_mAP(num_query, max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) }, device=device) checkpointer = ModelCheckpoint(output_dir, cfg.MODEL.NAME, checkpoint_period, n_saved=10, require_empty=False) timer = Timer(average=True) trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer, { 'model': model, 'optimizer': optimizer }) timer.attach(trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED) # average metric to attach on trainer RunningAverage(output_transform=lambda x: x[0]).attach(trainer, 'avg_loss') RunningAverage(output_transform=lambda x: x[1]).attach(trainer, 'avg_acc') @trainer.on(Events.STARTED) def start_training(engine): engine.state.epoch = start_epoch @trainer.on(Events.EPOCH_STARTED) def adjust_learning_rate(engine): scheduler.step() @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): global ITER ITER += 1 if ITER % log_period == 0: logger.info( "Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, Base Lr: {:.2e}" .format(engine.state.epoch, ITER, len(train_loader), engine.state.metrics['avg_loss'], engine.state.metrics['avg_acc'], scheduler.get_lr()[0])) if len(train_loader) == ITER: ITER = 0 # adding handlers using `trainer.on` decorator API @trainer.on(Events.EPOCH_COMPLETED) def print_times(engine): logger.info( 'Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]' .format(engine.state.epoch, timer.value() * timer.step_count, train_loader.batch_size / timer.value())) logger.info('-' * 10) timer.reset() # @trainer.on(Events.EPOCH_COMPLETED) # def log_validation_results(engine): # if engine.state.epoch % eval_period == 0: # evaluator.run(val_loader) # cmc, mAP = evaluator.state.metrics['r1_mAP'] # logger.info( # "Validation Results - Epoch: {}".format(engine.state.epoch)) # logger.info("mAP: {:.1%}".format(mAP)) # for r in [1, 5, 10]: # logger.info( # "CMC curve, Rank-{:<3}:{:.1%}".format(r, cmc[r - 1])) trainer.run(train_loader, max_epochs=epochs)