def train(train_loader, num_classes): parser = argparse.ArgumentParser(description="ReID Baseline Training") parser.add_argument( "--config_file", default="", help="path to config file", type=str ) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() set_seed(cfg.SOLVER.SEED) output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) logger = setup_logger("reid_baseline", output_dir, if_train=True) logger.info("Saving model in the path :{}".format(cfg.OUTPUT_DIR)) logger.info(args) if args.config_file != "": logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, 'r') as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) os.environ['CUDA_VISIBLE_DEVICES'] = cfg.MODEL.DEVICE_ID if cfg.MODEL.PRETRAIN_CHOICE == 'finetune': model = make_model(cfg, num_class=num_classes) model.load_param_finetune(cfg.MODEL.PRETRAIN_PATH) print('Loading pretrained model for finetuning......') else: model = make_model(cfg, num_class=num_classes) loss_func = make_loss(cfg, num_classes=num_classes) optimizer = make_optimizer(cfg, model) scheduler = WarmupCosineAnnealingLR(optimizer, cfg.SOLVER.MAX_EPOCHS, cfg.SOLVER.DELAY_ITERS, cfg.SOLVER.ETA_MIN_LR, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_EPOCHS, cfg.SOLVER.WARMUP_METHOD) logger.info("use WarmupCosineAnnealingLR, delay_step:{}".format(cfg.SOLVER.DELAY_ITERS)) do_train( cfg, model, train_loader, optimizer, scheduler, # modify for using self trained model loss_func )
def main(): parser = argparse.ArgumentParser(description="Baseline Training") parser.add_argument("--config_file", default="", help="path to config file", type=str) parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) args = parser.parse_args() if args.config_file != "": cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) num_gpus = 0 device = torch.device("cpu") if cfg.MODEL.DEVICE == 'cuda' and torch.cuda.is_available(): num_gpus = len(cfg.MODEL.DEVICE_IDS)-1 device_ids = cfg.MODEL.DEVICE_IDS.strip("d") print(device_ids) device = torch.device("cuda:{0}".format(device_ids)) logger = setup_logger('baseline', output_dir, 0) logger.info('Using {} GPUS'.format(num_gpus)) logger.info('Running with config:\n{}'.format(cfg)) train_dl, val_dl = make_dataloader(cfg, num_gpus) model = build_model(cfg) loss = make_loss(cfg, device) trainer = BaseTrainer(cfg, model, train_dl, val_dl, loss, num_gpus, device) logger.info(type(model)) logger.info(loss) logger.info(trainer) for epoch in range(trainer.epochs): for batch in trainer.train_dl: trainer.step(batch) trainer.handle_new_batch() trainer.handle_new_epoch()
def train(config_file, **kwargs): cfg.merge_from_file(config_file) if kwargs: opts = [] for k, v in kwargs.items(): opts.append(k) opts.append(v) cfg.merge_from_list(opts) cfg.freeze() #PersonReID_Dataset_Downloader('./datasets',cfg.DATASETS.NAMES) output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) logger = make_logger("Reid_Baseline", output_dir, 'log') logger.info("Using {} GPUS".format(1)) logger.info("Loaded configuration file {}".format(config_file)) logger.info("Running with config:\n{}".format(cfg)) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD output_dir = cfg.OUTPUT_DIR device = torch.device(cfg.DEVICE) epochs = cfg.SOLVER.MAX_EPOCHS method = cfg.DATALOADER.SAMPLER train_loader, val_loader, num_query, num_classes = data_loader( cfg, cfg.DATASETS.NAMES) model = getattr(models, cfg.MODEL.NAME)(num_classes, cfg.MODEL.LAST_STRIDE) if 'center' in method: loss_fn, center_criterion = make_loss(cfg) optimizer, optimizer_center = make_optimizer_with_center( cfg, model, center_criterion) else: loss_fn = make_loss(cfg) optimizer = make_optimizer(cfg, model) scheduler = make_scheduler(cfg, optimizer) logger.info("Start training") since = time.time() for epoch in range(epochs): count = 0 running_loss = 0.0 running_acc = 0 for data in tqdm(train_loader, desc='Iteration', leave=False): model.train() images, labels = data if device: model.to(device) images, labels = images.to(device), labels.to(device) optimizer.zero_grad() if 'center' in method: optimizer_center.zero_grad() scores, feats = model(images) loss = loss_fn(scores, feats, labels) loss.backward() optimizer.step() if 'center' in method: for param in center_criterion.parameters(): param.grad.data *= (1. / cfg.SOLVER.CENTER_LOSS_WEIGHT) optimizer_center.step() count = count + 1 running_loss += loss.item() running_acc += (scores.max(1)[1] == labels).float().mean().item() logger.info( "Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, Base Lr: {:.2e}" .format(epoch + 1, count, len(train_loader), running_loss / count, running_acc / count, scheduler.get_lr()[0])) scheduler.step() if (epoch + 1) % checkpoint_period == 0: model.cpu() model.save(output_dir, epoch + 1) # Validation if (epoch + 1) % eval_period == 0: all_feats = [] all_pids = [] all_camids = [] for data in tqdm(val_loader, desc='Feature Extraction', leave=False): model.eval() with torch.no_grad(): images, pids, camids = data if device: model.to(device) images = images.to(device) feats = model(images) all_feats.append(feats) all_pids.extend(np.asarray(pids)) all_camids.extend(np.asarray(camids)) logger.info("start evaluation") cmc, mAP = evaluation(all_feats, all_pids, all_camids, num_query) logger.info("Validation Results - Epoch: {}".format(epoch + 1)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format( r, cmc[r - 1])) time_elapsed = time.time() - since logger.info('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) logger.info('-' * 10)
os.environ['CUDA_VISIBLE_DEVICES'] = Cfg.MODEL.DEVICE_ID cudnn.benchmark = True # This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware. train_loader, val_loader = make_dataloader(Cfg) model_G, model_Dip, model_Dii, model_D_reid = make_model(Cfg) optimizerG = make_optimizer(Cfg, model_G) optimizerDip = make_optimizer(Cfg, model_Dip) optimizerDii = make_optimizer(Cfg, model_Dii) schedulerG = WarmupMultiStepLR(optimizerG, Cfg.SOLVER.STEPS, Cfg.SOLVER.GAMMA, Cfg.SOLVER.WARMUP_FACTOR, Cfg.SOLVER.WARMUP_EPOCHS, Cfg.SOLVER.WARMUP_METHOD) schedulerDip = WarmupMultiStepLR(optimizerDip, Cfg.SOLVER.STEPS, Cfg.SOLVER.GAMMA, Cfg.SOLVER.WARMUP_FACTOR, Cfg.SOLVER.WARMUP_EPOCHS, Cfg.SOLVER.WARMUP_METHOD) schedulerDii = WarmupMultiStepLR(optimizerDii, Cfg.SOLVER.STEPS, Cfg.SOLVER.GAMMA, Cfg.SOLVER.WARMUP_FACTOR, Cfg.SOLVER.WARMUP_EPOCHS, Cfg.SOLVER.WARMUP_METHOD) GAN_loss, L1_loss, ReID_loss = make_loss(Cfg) do_train(Cfg, model_G, model_Dip, model_Dii, model_D_reid, train_loader, val_loader, optimizerG, optimizerDip, optimizerDii, GAN_loss, L1_loss, ReID_loss, schedulerG, schedulerDip, schedulerDii)
if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) set_logger(logger, log_dir) log_file = os.path.join(log_dir, opt.version + '.txt') with open(log_file, 'a') as f: f.write(str(opt) + '\n') f.flush() os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus cudnn.benchmark = True data = Data() model = build_model(opt, data.num_classes) optimizer = make_optimizer(opt, model) loss = make_loss(opt, data.num_classes) # WARMUP_FACTOR: 0.01 # WARMUP_ITERS: 10 scheduler = WarmupMultiStepLR(optimizer, opt.steps, 0.1, 0.01, 10, "linear") main = Main(opt, model, data, optimizer, scheduler, loss) if opt.mode == 'train': # 总迭代次数 epoch = 200 start_epoch = 1 # 断点加载训练 if opt.resume:
def main(): """ Configs """ args = get_parser().parse_args() if not os.path.exists(args.exp_root): os.makedirs(args.exp_root) if torch.cuda.is_available() and not args.cuda: print("\nStrongly recommend to run with '--cuda' if you have a device with CUDA support.") # print configs print('='*40) print('Dataset: {}'.format(args.dataset)) print('Model: ResNet-50') print('Optimizer: Adam') print('Image height: {}'.format(args.img_height)) print('Image width: {}'.format(args.img_width)) print('Loss: {}'.format(args.loss_type)) if args.loss_type == 'softmax-triplet': print(' alpha: {}'.format(args.alpha)) if args.loss_type in ['contrastive', 'triplet', 'dmml']: print(' margin: {}'.format(args.margin)) print(' class number: {}'.format(args.num_classes)) if args.loss_type == 'npair': pass elif args.loss_type == 'dmml': print(' support number: {}'.format(args.num_support)) print(' query number: {}'.format(args.num_query)) print(' distance_mode: {}'.format(args.distance_mode)) else: print(' instance number: {}'.format(args.num_instances)) print('Epochs: {}'.format(args.num_epochs)) print('Learning rate: {}'.format(args.lr)) print(' decay beginning epoch: {}'.format(args.lr_decay_start_epoch)) print('Weight decay: {}'.format(args.weight_decay)) if args.cuda: print('GPU(s): {}'.format(args.gpu)) print('='*40) """ Initialization """ print('Initializing...') if args.cuda: gpus = ''.join(args.gpu.split()) gids = [int(gid) for gid in gpus.split(',')] else: gids = None init_seed(args, gids) model = make_model(args, gids) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) criterion = make_loss(args, gids) print('Done.') """ Training """ print('Starting training...') train(args, model, optimizer, criterion, gids) print('Training completed.')
def train(config_file, **kwargs): # 1. config cfg.merge_from_file(config_file) if kwargs: opts = [] for k, v in kwargs.items(): opts.append(k) opts.append(v) cfg.merge_from_list(opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) logger = make_logger("Reid_Baseline", output_dir, 'log') logger.info("Using {} GPUS".format(1)) logger.info("Loaded configuration file {}".format(config_file)) logger.info("Running with config:\n{}".format(cfg)) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD device = torch.device(cfg.DEVICE) epochs = cfg.SOLVER.MAX_EPOCHS # 2. datasets # Load the original dataset dataset_reference = init_dataset(cfg, cfg.DATASETS.NAMES + '_origin') #'Market1501_origin' train_set_reference = ImageDataset(dataset_reference.train, train_transforms) train_loader_reference = DataLoader(train_set_reference, batch_size=128, shuffle=False, num_workers=cfg.DATALOADER.NUM_WORKERS, collate_fn=train_collate_fn) # Load the one-shot dataset train_loader, val_loader, num_query, num_classes = data_loader( cfg, cfg.DATASETS.NAMES) # 3. load the model and optimizer model = getattr(models, cfg.MODEL.NAME)(num_classes) optimizer = make_optimizer(cfg, model) scheduler = make_scheduler(cfg, optimizer) loss_fn = make_loss(cfg) logger.info("Start training") since = time.time() top = 0 # the choose of the nearest sample top_update = 0 # the first iteration train 80 steps and the following train 40 # 4. Train and test for epoch in range(epochs): running_loss = 0.0 running_acc = 0 count = 1 # get nearest samples and reset the model if top_update < 80: train_step = 80 else: train_step = 40 if top_update % train_step == 0: print("top: ", top) A, path_labeled = PSP(model, train_loader_reference, train_loader, top, cfg) top += cfg.DATALOADER.NUM_JUMP model = getattr(models, cfg.MODEL.NAME)(num_classes) optimizer = make_optimizer(cfg, model) scheduler = make_scheduler(cfg, optimizer) A_store = A.clone() top_update += 1 for data in tqdm(train_loader, desc='Iteration', leave=False): model.train() images, labels_batch, img_path = data index, index_labeled = find_index_by_path(img_path, dataset_reference.train, path_labeled) images_relevant, GCN_index, choose_from_nodes, labels = load_relevant( cfg, dataset_reference.train, index, A_store, labels_batch, index_labeled) # if device: model.to(device) images = images_relevant.to(device) scores, feat = model(images) del images loss = loss_fn(scores, feat, labels.to(device), choose_from_nodes) optimizer.zero_grad() loss.backward() optimizer.step() count = count + 1 running_loss += loss.item() running_acc += (scores[choose_from_nodes].max(1)[1].cpu() == labels_batch).float().mean().item() scheduler.step() # for model save if you need # if (epoch+1) % checkpoint_period == 0: # model.cpu() # model.save(output_dir,epoch+1) # Validation if (epoch + 1) % eval_period == 0: all_feats = [] all_pids = [] all_camids = [] for data in tqdm(val_loader, desc='Feature Extraction', leave=False): model.eval() with torch.no_grad(): images, pids, camids = data model.to(device) images = images.to(device) feats = model(images) del images all_feats.append(feats.cpu()) all_pids.extend(np.asarray(pids)) all_camids.extend(np.asarray(camids)) cmc, mAP = evaluation(all_feats, all_pids, all_camids, num_query) logger.info("Validation Results - Epoch: {}".format(epoch + 1)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10, 20]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format( r, cmc[r - 1])) time_elapsed = time.time() - since logger.info('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) logger.info('-' * 10)
from loss import make_loss from processor import do_train from solver import make_optimizer, WarmupMultiStepLR from utils.logger import setup_logger if __name__ == '__main__': Cfg = Configuration() log_dir = Cfg.DATALOADER.LOG_DIR logger = setup_logger('{}'.format(Cfg.PROJECT_NAME), log_dir) logger.info("Running with config:\n{}".format(Cfg.PROJECT_NAME)) os.environ['CUDA_VISIBLE_DEVICES'] = Cfg.DEVICE_ID cudnn.benchmark = True # This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware. train_loader, val_loader = make_dataloader(Cfg) model = make_model(Cfg) optimizer = make_optimizer(Cfg, model) scheduler = WarmupMultiStepLR(Cfg, optimizer) loss_func = make_loss(Cfg) do_train( Cfg, model, train_loader, val_loader, optimizer, scheduler, # modify for using self trained model loss_func, )
from datasets import make_dataloader from model import make_model from solver import make_optimizer, WarmupMultiStepLR from loss import make_loss from processor import do_train if __name__ == '__main__': Cfg = Configuration() log_dir = Cfg.LOG_DIR logger = setup_logger('{}'.format(Cfg.PROJECT_NAME), log_dir) os.environ['CUDA_VISIBLE_DEVICES'] = Cfg.DEVICE_ID cudnn.benchmark = True # This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware. train_loader, test_loader = make_dataloader(Cfg) model = make_model(Cfg) optimizer = make_optimizer(Cfg, model) scheduler = WarmupMultiStepLR(optimizer, Cfg.SOLVER_STEPS, Cfg.LR_DECAY_FACTOR, Cfg.SOLVER_WARMUP_FACTOR, Cfg.SOLVER_WARMUP_EPOCHS, Cfg.SOLVER_WARMUP_METHOD) loss_func = make_loss(Cfg, num_classes=2) do_train(Cfg, model, train_loader, test_loader, optimizer, scheduler, # modify for using self trained model loss_func)
os.environ['CUDA_VISIBLE_DEVICES'] = Cfg.MODEL.DEVICE_ID cudnn.benchmark = True # This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware. train_loader, val_loader = make_dataloader(Cfg) model_G, model_Dip, model_Dii = make_model(Cfg) optimizerG = make_optimizer(Cfg, model_G) optimizerDip = make_optimizer(Cfg, model_Dip) optimizerDii = make_optimizer(Cfg, model_Dii) schedulerG = WarmupMultiStepLR(optimizerG, Cfg.SOLVER.STEPS, Cfg.SOLVER.GAMMA, Cfg.SOLVER.WARMUP_FACTOR, Cfg.SOLVER.WARMUP_EPOCHS, Cfg.SOLVER.WARMUP_METHOD) schedulerDip = WarmupMultiStepLR(optimizerDip, Cfg.SOLVER.STEPS, Cfg.SOLVER.GAMMA, Cfg.SOLVER.WARMUP_FACTOR, Cfg.SOLVER.WARMUP_EPOCHS, Cfg.SOLVER.WARMUP_METHOD) schedulerDii = WarmupMultiStepLR(optimizerDii, Cfg.SOLVER.STEPS, Cfg.SOLVER.GAMMA, Cfg.SOLVER.WARMUP_FACTOR, Cfg.SOLVER.WARMUP_EPOCHS, Cfg.SOLVER.WARMUP_METHOD) GAN_loss, L1_loss = make_loss(Cfg) do_train(Cfg, model_G, model_Dip, model_Dii, train_loader, val_loader, optimizerG, optimizerDip, optimizerDii, GAN_loss, L1_loss, schedulerG, schedulerDip, schedulerDii)
def train(config_file, resume=False, **kwargs): cfg.merge_from_file(config_file) if kwargs: opts = [] for k, v in kwargs.items(): opts.append(k) opts.append(v) cfg.merge_from_list(opts) cfg.freeze() # [PersonReID_Dataset_Downloader(cfg.DATASETS.STORE_DIR,dataset) for dataset in cfg.DATASETS.SOURCE] # [PersonReID_Dataset_Downloader(cfg.DATASETS.STORE_DIR,dataset) for dataset in cfg.DATASETS.TARGET] output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) logger = make_logger("Reid_Baseline", output_dir, 'log', resume) if not resume: logger.info("Using {} GPUS".format(1)) logger.info("Loaded configuration file {}".format(config_file)) logger.info("Running with config:\n{}".format(cfg)) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD output_dir = cfg.OUTPUT_DIR device = torch.device(cfg.DEVICE) epochs = cfg.SOLVER.MAX_EPOCHS train_loader, _, _, num_classes = data_loader(cfg, cfg.DATASETS.SOURCE, merge=cfg.DATASETS.MERGE) model = getattr(models, cfg.MODEL.NAME)(num_classes, cfg.MODEL.LAST_STRIDE, cfg.MODEL.POOL) if resume: checkpoints = get_last_stats(output_dir) try: model_dict = torch.load(checkpoints[cfg.MODEL.NAME]) except KeyError: model_dict = torch.load(checkpoints[str(type(model))]) model.load_state_dict(model_dict) if device: model.to(device) # must be done before the optimizer generation optimizer = make_optimizer(cfg, model) scheduler = make_scheduler(cfg, optimizer) base_epo = 0 if resume: optimizer.load_state_dict(torch.load(checkpoints['opt'])) sch_dict = torch.load(checkpoints['sch']) scheduler.load_state_dict(sch_dict) base_epo = checkpoints['epo'] loss_fn = make_loss(cfg) if not resume: logger.info("Start training") since = time.time() for epoch in range(epochs): count = 0 running_loss = 0.0 running_acc = 0 for data in tqdm(train_loader, desc='Iteration', leave=False): model.train() images, labels, domains = data if device: model.to(device) images, labels, domains = images.to(device), labels.to( device), domains.to(device) optimizer.zero_grad() scores, feats = model(images) loss = loss_fn(scores, feats, labels) loss.backward() optimizer.step() count = count + 1 running_loss += loss.item() running_acc += ( scores[0].max(1)[1] == labels).float().mean().item() logger.info( "Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, Base Lr: {:.2e}" .format(epoch + 1 + base_epo, count, len(train_loader), running_loss / count, running_acc / count, scheduler.get_lr()[0])) scheduler.step() if (epoch + 1 + base_epo) % checkpoint_period == 0: model.cpu() model.save(output_dir, epoch + 1 + base_epo) torch.save( optimizer.state_dict(), os.path.join(output_dir, 'opt_epo' + str(epoch + 1 + base_epo) + '.pth')) torch.save( scheduler.state_dict(), os.path.join(output_dir, 'sch_epo' + str(epoch + 1 + base_epo) + '.pth')) # Validation if (epoch + base_epo + 1) % eval_period == 0: # Validation on Target Dataset for target in cfg.DATASETS.TARGET: mAPs = [] cmcs = [] for i in range(iteration): set_seeds(i) _, val_loader, num_query, _ = data_loader(cfg, (target, ), merge=False) all_feats = [] all_pids = [] all_camids = [] since = time.time() for data in tqdm(val_loader, desc='Feature Extraction', leave=False): model.eval() with torch.no_grad(): images, pids, camids = data if device: model.to(device) images = images.to(device) feats = model(images) feats /= feats.norm(dim=-1, keepdim=True) all_feats.append(feats) all_pids.extend(np.asarray(pids)) all_camids.extend(np.asarray(camids)) cmc, mAP = evaluation(all_feats, all_pids, all_camids, num_query) mAPs.append(mAP) cmcs.append(cmc) mAP = np.mean(np.array(mAPs)) cmc = np.mean(np.array(cmcs), axis=0) mAP_std = np.std(np.array(mAPs)) cmc_std = np.std(np.array(cmcs), axis=0) logger.info("Validation Results: {} - Epoch: {}".format( target, epoch + 1 + base_epo)) logger.info("mAP: {:.1%} (std: {:.3%})".format(mAP, mAP_std)) for r in [1, 5, 10]: logger.info( "CMC curve, Rank-{:<3}:{:.1%} (std: {:.3%})".format( r, cmc[r - 1], cmc_std[r - 1])) reset() time_elapsed = time.time() - since logger.info('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) logger.info('-' * 10)
train_loader, val_loader_green, val_loader_normal, num_query_green, num_query_normal, num_classes = make_dataloader( cfg) if cfg.MODEL.PRETRAIN_CHOICE == 'finetune': model = make_model(cfg, num_class=num_classes) # model = get_efficientnet(model_name=cfg.MODEL.NAME, num_class=num_classes) model.load_param_finetune(cfg.MODEL.PRETRAIN_PATH) print('Loading pretrained model for finetuning......') else: model = make_model(cfg, num_class=num_classes) # model = get_efficientnet(model_name=cfg.MODEL.NAME, num_class=num_classes) # print(model) loss_func, center_criterion = make_loss(cfg, num_classes=num_classes, feat_dim=model.in_planes) optimizer, optimizer_center = make_optimizer(cfg, model, center_criterion) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_EPOCHS, cfg.SOLVER.WARMUP_METHOD) do_train( cfg, model, center_criterion, train_loader, val_loader_green, optimizer,
def main(): torch.backends.cudnn.deterministic = True cudnn.benchmark = True #parser = argparse.ArgumentParser(description="ReID Baseline Training") #parser.add_argument( #"--config_file", default="", help="path to config file", type=str) #parser.add_argument("opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER) #args = parser.parse_args() config_file = 'configs/baseline_veri_r101_a.yml' if config_file != "": cfg.merge_from_file(config_file) #cfg.merge_from_list(args.opts) cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) logger = setup_logger("reid_baseline", output_dir, if_train=True) logger.info("Saving model in the path :{}".format(cfg.OUTPUT_DIR)) logger.info(config_file) if config_file != "": logger.info("Loaded configuration file {}".format(config_file)) with open(config_file, 'r') as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) os.environ['CUDA_VISIBLE_DEVICES'] = cfg.MODEL.DEVICE_ID path = 'D:/Python_SMU/Veri/verigms/gms/' pkl = {} entries = os.listdir(path) for name in entries: f = open((path + name), 'rb') if name == 'featureMatrix.pkl': s = name[0:13] else: s = name[0:3] pkl[s] = pickle.load(f) f.close with open('cids.pkl', 'rb') as handle: b = pickle.load(handle) with open('index.pkl', 'rb') as handle: c = pickle.load(handle) train_transforms, val_transforms, dataset, train_set, val_set = make_dataset( cfg, pkl_file='index.pkl') num_workers = cfg.DATALOADER.NUM_WORKERS num_classes = dataset.num_train_pids #pkl_f = 'index.pkl' pid = 0 pidx = {} for img_path, pid, _, _ in dataset.train: path = img_path.split('\\')[-1] folder = path[1:4] pidx[folder] = pid pid += 1 if 'triplet' in cfg.DATALOADER.SAMPLER: train_loader = DataLoader(train_set, batch_size=cfg.SOLVER.IMS_PER_BATCH, sampler=RandomIdentitySampler( dataset.train, cfg.SOLVER.IMS_PER_BATCH, cfg.DATALOADER.NUM_INSTANCE), num_workers=num_workers, pin_memory=True, collate_fn=train_collate_fn) elif cfg.DATALOADER.SAMPLER == 'softmax': print('using softmax sampler') train_loader = DataLoader(train_set, batch_size=cfg.SOLVER.IMS_PER_BATCH, shuffle=True, num_workers=num_workers, pin_memory=True, collate_fn=train_collate_fn) else: print('unsupported sampler! expected softmax or triplet but got {}'. format(cfg.SAMPLER)) print("train loader loaded successfully") val_loader = DataLoader(val_set, batch_size=cfg.TEST.IMS_PER_BATCH, shuffle=False, num_workers=num_workers, pin_memory=True, collate_fn=train_collate_fn) print("val loader loaded successfully") if cfg.MODEL.PRETRAIN_CHOICE == 'finetune': model = make_model(cfg, num_class=576) model.load_param_finetune(cfg.MODEL.PRETRAIN_PATH) print('Loading pretrained model for finetuning......') else: model = make_model(cfg, num_class=num_classes) loss_func, center_criterion = make_loss(cfg, num_classes=num_classes) optimizer, optimizer_center = make_optimizer(cfg, model, center_criterion) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_EPOCHS, cfg.SOLVER.WARMUP_METHOD) print("model,optimizer, loss, scheduler loaded successfully") height, width = cfg.INPUT.SIZE_TRAIN log_period = cfg.SOLVER.LOG_PERIOD checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD device = "cuda" epochs = cfg.SOLVER.MAX_EPOCHS logger = logging.getLogger("reid_baseline.train") logger.info('start training') if device: if torch.cuda.device_count() > 1: print('Using {} GPUs for training'.format( torch.cuda.device_count())) model = nn.DataParallel(model) model.to(device) loss_meter = AverageMeter() acc_meter = AverageMeter() evaluator = R1_mAP_eval(len(dataset.query), max_rank=50, feat_norm=cfg.TEST.FEAT_NORM) model.base._freeze_stages() logger.info('Freezing the stages number:{}'.format(cfg.MODEL.FROZEN)) data_index = search(pkl) print("Ready for training") for epoch in range(1, epochs + 1): start_time = time.time() loss_meter.reset() acc_meter.reset() evaluator.reset() scheduler.step() model.train() for n_iter, (img, label, index, pid, cid) in enumerate(train_loader): optimizer.zero_grad() optimizer_center.zero_grad() #img = img.to(device) #target = vid.to(device) trainX, trainY = torch.zeros( (train_loader.batch_size * 3, 3, height, width), dtype=torch.float32), torch.zeros( (train_loader.batch_size * 3), dtype=torch.int64) for i in range(train_loader.batch_size): labelx = label[i] indexx = index[i] cidx = pid[i] if indexx > len(pkl[labelx]) - 1: indexx = len(pkl[labelx]) - 1 a = pkl[labelx][indexx] minpos = np.argmin(ma.masked_where(a == 0, a)) pos_dic = train_set[data_index[cidx][1] + minpos] #print(pos_dic[1]) neg_label = int(labelx) while True: neg_label = random.choice(range(1, 770)) if neg_label is not int(labelx) and os.path.isdir( os.path.join('D:/datasets/veri-split/train', strint(neg_label))) is True: break negative_label = strint(neg_label) neg_cid = pidx[negative_label] neg_index = random.choice(range(0, len(pkl[negative_label]))) neg_dic = train_set[data_index[neg_cid][1] + neg_index] trainX[i] = img[i] trainX[i + train_loader.batch_size] = pos_dic[0] trainX[i + (train_loader.batch_size * 2)] = neg_dic[0] trainY[i] = cidx trainY[i + train_loader.batch_size] = pos_dic[3] trainY[i + (train_loader.batch_size * 2)] = neg_dic[3] #print(trainY) trainX = trainX.cuda() trainY = trainY.cuda() score, feat = model(trainX, trainY) loss = loss_func(score, feat, trainY) loss.backward() optimizer.step() if 'center' in cfg.MODEL.METRIC_LOSS_TYPE: for param in center_criterion.parameters(): param.grad.data *= (1. / cfg.SOLVER.CENTER_LOSS_WEIGHT) optimizer_center.step() acc = (score.max(1)[1] == trainY).float().mean() loss_meter.update(loss.item(), img.shape[0]) acc_meter.update(acc, 1) if (n_iter + 1) % log_period == 0: logger.info( "Epoch[{}] Iteration[{}/{}] Loss: {:.3f}, Acc: {:.3f}, Base Lr: {:.2e}" .format(epoch, (n_iter + 1), len(train_loader), loss_meter.avg, acc_meter.avg, scheduler.get_lr()[0])) end_time = time.time() time_per_batch = (end_time - start_time) / (n_iter + 1) logger.info( "Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]" .format(epoch, time_per_batch, train_loader.batch_size / time_per_batch)) if epoch % checkpoint_period == 0: torch.save( model.state_dict(), os.path.join(cfg.OUTPUT_DIR, cfg.MODEL.NAME + '_{}.pth'.format(epoch))) if epoch % eval_period == 0: model.eval() for n_iter, (img, vid, camid, _, _) in enumerate(val_loader): with torch.no_grad(): img = img.to(device) feat = model(img) evaluator.update((feat, vid, camid)) cmc, mAP, _, _, _, _, _ = evaluator.compute() logger.info("Validation Results - Epoch: {}".format(epoch)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format( r, cmc[r - 1]))
def train(config_file1, config_file2, **kwargs): # 1. config cfg.merge_from_file(config_file1) if kwargs: opts = [] for k, v in kwargs.items(): opts.append(k) opts.append(v) cfg.merge_from_list(opts) #cfg.freeze() output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) logger = make_logger("Reid_Baseline", output_dir, 'log') #logger.info("Using {} GPUS".format(1)) logger.info("Loaded configuration file {}".format(config_file1)) logger.info("Running with config:\n{}".format(cfg)) checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD eval_period = cfg.SOLVER.EVAL_PERIOD #device = torch.device(cfg.DEVICE) epochs = cfg.SOLVER.MAX_EPOCHS # 2. datasets # Load the original dataset #dataset_reference = init_dataset(cfg, cfg.DATASETS.NAMES ) dataset_reference = init_dataset(cfg, cfg.DATASETS.NAMES + '_origin') #'Market1501_origin' train_set_reference = ImageDataset(dataset_reference.train, train_transforms) train_loader_reference = DataLoader(train_set_reference, batch_size=128, shuffle=False, num_workers=cfg.DATALOADER.NUM_WORKERS, collate_fn=train_collate_fn) #不用放到网络里,所以不用transform # Load the one-shot dataset train_loader, val_loader, num_query, num_classes = data_loader( cfg, cfg.DATASETS.NAMES) # 3. load the model and optimizer model = getattr(models, cfg.MODEL.NAME)(num_classes) optimizer = make_optimizer(cfg, model) scheduler = make_scheduler(cfg, optimizer) loss_fn = make_loss(cfg) logger.info("Start training") since = time.time() if torch.cuda.device_count() > 1: print("Use", torch.cuda.device_count(), 'gpus') elif torch.cuda.device_count() == 1: print("Use", torch.cuda.device_count(), 'gpu') model = nn.DataParallel(model) top = 0 # the choose of the nearest sample top_update = 0 # the first iteration train 80 steps and the following train 40 train_time = 0 #1表示训练几次gan bound = 1 #究竟训练几次,改成多次以后再说 lock = False train_compen = 0 # 4. Train and test for epoch in range(epochs): running_loss = 0.0 running_acc = 0 count = 1 # get nearest samples and reset the model if top_update < 80: train_step = 80 #重新gan生成的图像第一次是否需要训练80次,看看是否下一次输入的图片变少了吧 else: train_step = 40 #if top_update % train_step == 0: if top_update % train_step == 0 and train_compen == 0: print("top: ", top) #作者原来的实验top取到41,这里折中(是否要折中也是个实验测试的点) #if 1==1: if top >= 8 and train_time < bound: train_compen = (top - 1) * 40 + 80 #build_image(A,train_loader_reference,train_loader) train_time += 1 #gan的训练模式 mode = 'train' retrain(mode) #gan生成图像到原来数据集 produce() cfg.merge_from_file(config_file2) output_dir = cfg.OUTPUT_DIR if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) logger = make_logger("Reid_Baseline", output_dir, 'log') logger.info( "Loaded configuration file {}".format(config_file2)) logger.info("Running with config:\n{}".format(cfg)) dataset_reference = init_dataset( cfg, cfg.DATASETS.NAMES + '_origin') #'Market1501_origin' train_set_reference = ImageDataset(dataset_reference.train, train_transforms) train_loader_reference = DataLoader( train_set_reference, batch_size=128, shuffle=False, num_workers=cfg.DATALOADER.NUM_WORKERS, collate_fn=train_collate_fn) dataset_ref = init_dataset(cfg, cfg.DATASETS.NAMES + '_ref') #'Market1501_origin' train_set_ref = ImageDataset(dataset_ref.train, train_transforms) train_loader_ref = DataLoader( train_set_ref, batch_size=128, shuffle=False, num_workers=cfg.DATALOADER.NUM_WORKERS, collate_fn=train_collate_fn) lock = True if lock == True: A, path_labeled = PSP2(model, train_loader_reference, train_loader, train_loader_ref, top, logger, cfg) lock = False else: A, path_labeled = PSP(model, train_loader_reference, train_loader, top, logger, cfg) #vis = len(train_loader_reference.dataset) #A= torch.ones(vis, len(train_loader_reference.dataset)) #build_image(A,train_loader_reference,train_loader) top += cfg.DATALOADER.NUM_JUMP model = getattr(models, cfg.MODEL.NAME)(num_classes) model = nn.DataParallel(model) optimizer = make_optimizer(cfg, model) scheduler = make_scheduler(cfg, optimizer) A_store = A.clone() top_update += 1 for data in tqdm(train_loader, desc='Iteration', leave=False): model.train() images, labels_batch, img_path = data index, index_labeled = find_index_by_path(img_path, dataset_reference.train, path_labeled) images_relevant, GCN_index, choose_from_nodes, labels = load_relevant( cfg, dataset_reference.train, index, A_store, labels_batch, index_labeled) # if device: model.to(device) images = images_relevant.to(device) scores, feat = model(images) del images loss = loss_fn(scores, feat, labels.to(device), choose_from_nodes) optimizer.zero_grad() loss.backward() optimizer.step() count = count + 1 running_loss += loss.item() running_acc += (scores[choose_from_nodes].max(1)[1].cpu() == labels_batch).float().mean().item() scheduler.step() # for model save if you need # if (epoch+1) % checkpoint_period == 0: # model.cpu() # model.save(output_dir,epoch+1) # Validation if (epoch + 1) % eval_period == 0: all_feats = [] all_pids = [] all_camids = [] for data in tqdm(val_loader, desc='Feature Extraction', leave=False): model.eval() with torch.no_grad(): images, pids, camids = data model.to(device) images = images.to(device) feats = model(images) del images all_feats.append(feats.cpu()) all_pids.extend(np.asarray(pids)) all_camids.extend(np.asarray(camids)) cmc, mAP = evaluation(all_feats, all_pids, all_camids, num_query) logger.info("Validation Results - Epoch: {}".format(epoch + 1)) logger.info("mAP: {:.1%}".format(mAP)) for r in [1, 5, 10, 20]: logger.info("CMC curve, Rank-{:<3}:{:.1%}".format( r, cmc[r - 1])) if train_compen > 0: train_compen -= 1 time_elapsed = time.time() - since logger.info('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) logger.info('-' * 10)
with open(args.config_file, 'r') as cf: config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) os.environ['CUDA_VISIBLE_DEVICES'] = cfg.MODEL.DEVICE_ID train_loader, val_loader, num_query, num_classes = make_dataloader(cfg) if cfg.MODEL.PRETRAIN_CHOICE == 'finetune': model = make_model(cfg, num_class=num_classes) model.load_param_finetune(cfg.MODEL.PRETRAIN_PATH) print('Loading pretrained model for finetuning......') else: model = make_model(cfg, num_class=num_classes) loss_func, center_criterion = make_loss(cfg, num_classes=num_classes) optimizer, optimizer_center = make_optimizer(cfg, model, center_criterion) scheduler = WarmupMultiStepLR(optimizer, cfg.SOLVER.STEPS, cfg.SOLVER.GAMMA, cfg.SOLVER.WARMUP_FACTOR, cfg.SOLVER.WARMUP_EPOCHS, cfg.SOLVER.WARMUP_METHOD) do_train( cfg, model, center_criterion, train_loader, val_loader, optimizer, optimizer_center,
def train(**kwargs): opt._parse(kwargs) # set random seed and cudnn benchmark torch.manual_seed(opt.seed) os.makedirs(opt.save_dir, exist_ok=True) use_gpu = torch.cuda.is_available() sys.stdout = Logger(osp.join(opt.save_dir, 'log_train.txt')) print('=========user config==========') pprint(opt._state_dict()) print('============end===============') if use_gpu: print('currently using GPU') cudnn.benchmark = True torch.cuda.manual_seed_all(opt.seed) else: print('currently using cpu') print('initializing tx_chanllege dataset') dataset = Tx_dataset(file_list='train_list_new.txt').dataset query_dataset = Tx_dataset(set='train_set', file_list='val_query_list.txt').dataset gallery_dataset = Tx_dataset(set='train_set', file_list='val_gallery_list.txt').dataset train_set = ImageDataset(dataset, transform=build_transforms(opt, is_train=True)) pin_memory = True if use_gpu else False summary_writer = SummaryWriter(osp.join(opt.save_dir, 'tensorboard_log')) if opt.sampler_new: trainloader = DataLoader( train_set, sampler=RandomIdentitySampler_new(train_set, opt.train_batch, opt.num_instances), # sampler=RandomIdentitySampler(train_set, opt.num_instances), batch_size=opt.train_batch, num_workers=opt.workers, pin_memory=pin_memory, drop_last=True) else: trainloader = DataLoader( train_set, # sampler=RandomIdentitySampler_new(train_set, opt.train_batch, opt.num_instances), sampler=RandomIdentitySampler(train_set, opt.num_instances), batch_size=opt.train_batch, num_workers=opt.workers, pin_memory=pin_memory, drop_last=True) queryloader = DataLoader(ImageDataset(query_dataset, transform=build_transforms( opt, is_train=False)), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) galleryloader = DataLoader(ImageDataset(gallery_dataset, transform=build_transforms( opt, is_train=False)), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) queryFliploader = DataLoader(ImageDataset(query_dataset, transform=build_transforms( opt, is_train=False, flip=True)), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) galleryFliploader = DataLoader(ImageDataset(gallery_dataset, transform=build_transforms( opt, is_train=False, flip=True)), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) queryCenterloader = DataLoader(ImageDataset(query_dataset, transform=build_transforms( opt, is_train=False, crop='center')), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) galleryCenterloader = DataLoader(ImageDataset(gallery_dataset, transform=build_transforms( opt, is_train=False, crop='center')), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) queryLtloader = DataLoader(ImageDataset( query_dataset, transform=build_transforms(opt, is_train=False, crop='lt')), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) galleryLtloader = DataLoader(ImageDataset(gallery_dataset, transform=build_transforms( opt, is_train=False, crop='lt')), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) queryRtloader = DataLoader(ImageDataset( query_dataset, transform=build_transforms(opt, is_train=False, crop='rt')), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) galleryRtloader = DataLoader(ImageDataset(gallery_dataset, transform=build_transforms( opt, is_train=False, crop='rt')), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) queryRbloader = DataLoader(ImageDataset( query_dataset, transform=build_transforms(opt, is_train=False, crop='rb')), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) galleryRbloader = DataLoader(ImageDataset(gallery_dataset, transform=build_transforms( opt, is_train=False, crop='rb')), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) queryLbloader = DataLoader(ImageDataset( query_dataset, transform=build_transforms(opt, is_train=False, crop='lb')), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) galleryLbloader = DataLoader(ImageDataset(gallery_dataset, transform=build_transforms( opt, is_train=False, crop='lb')), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) print('initializing model ...') model = build_model(opt) optim_policy = model.get_optim_policy() if opt.pretrained_choice == 'self': state_dict = torch.load(opt.pretrained_model)['state_dict'] # state_dict = {k: v for k, v in state_dict.items() \ # if not ('reduction' in k or 'softmax' in k)} model.load_state_dict(state_dict, False) print('load pretrained model ' + opt.pretrained_model) print('model size: {:.5f}M'.format( sum(p.numel() for p in model.parameters()) / 1e6)) if use_gpu: model = nn.DataParallel(model).cuda() reid_evaluator = Evaluator(model, norm=opt.norm, eval_flip=opt.eval_flip, re_ranking=opt.re_ranking) if opt.use_center: criterion = make_loss_with_center(opt) else: criterion = make_loss(opt) # get optimizer if opt.optim == "sgd": optimizer = torch.optim.SGD(optim_policy, lr=opt.lr, momentum=0.9, weight_decay=opt.weight_decay) else: optimizer = torch.optim.Adam(optim_policy, lr=opt.lr, weight_decay=opt.weight_decay) start_epoch = opt.start_epoch # get trainer and evaluator reid_trainer = cls_tripletTrainer(opt, model, optimizer, criterion, summary_writer) # start training best_rank1 = opt.best_rank best_epoch = 0 for epoch in range(start_epoch, opt.max_epoch): if opt.adjust_lr: adjust_lr(optimizer, opt.lr, opt.model_name, epoch + 1) reid_trainer.train(epoch, trainloader) # skip if not save model if opt.eval_step > 0 and (epoch + 1) % opt.eval_step == 0 or ( epoch + 1) == opt.max_epoch: rank1 = reid_evaluator.validation( queryloader, galleryloader, queryFliploader, galleryFliploader, queryCenterloader, galleryCenterloader, queryLtloader, galleryLtloader, queryRtloader, galleryRtloader, queryLbloader, galleryLbloader, queryRbloader, galleryRbloader) print('start re_ranking......') _ = reid_evaluator.validation(queryloader, galleryloader, queryFliploader, galleryFliploader, queryCenterloader, galleryCenterloader, queryLtloader, galleryLtloader, queryRtloader, galleryRtloader, queryLbloader, galleryLbloader, queryRbloader, galleryRbloader, re_ranking=True) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': epoch + 1 }, is_best=is_best, save_dir=opt.save_dir, filename='checkpoint_ep' + str(epoch + 1) + '.pth.tar') print('Best rank-1 {:.1%}, achived at epoch {}'.format( best_rank1, best_epoch))
import yaml import torch if args.config != '': with open(args.config, 'r') as f: config = yaml.load(f) for op in config: setattr(args, op, config[op]) torch.backends.cudnn.benchmark = True # loader = data.Data(args) ckpt = utility.checkpoint(args) loader = data_v2.ImageDataManager(args) model = make_model(args, ckpt) optimzer = make_optimizer(args, model) loss = make_loss(args, ckpt) if not args.test_only else None start = -1 if args.load != '': start, model, optimizer = ckpt.resume_from_checkpoint( osp.join(ckpt.dir, 'model-latest.pth'), model, optimzer) start = start - 1 if args.pre_train != '': ckpt.load_pretrained_weights(model, args.pre_train) scheduler = make_scheduler(args, optimzer, start) # print('[INFO] System infomation: \n {}'.format(get_pretty_env_info())) ckpt.write_log('[INFO] Model parameters: {com[0]} flops: {com[1]}'.format( com=compute_model_complexity(model, (1, 3, args.height, args.width))))