def __init__(self): global args self.ts = time.strftime("%Y-%m-%d_%H-%M-%S_") self.args = args self.data_manager = ImageDataManager(not args.use_cpu, **image_dataset_kwargs(args)) self.dataset = self.data_manager.dataset self.attributes = self.dataset.attributes self.positive_label_ratio = self.dataset.get_positive_attribute_ratio() self.split = self.args.eval_split self.menu()
def init_data(self): print('Initializing image data manager') self.dm = ImageDataManager(self.use_gpu, **image_dataset_kwargs(self.args)) self.trainloader, self.testloader_dict = self.dm.return_dataloaders() self.attributes = self.dm.attributes self.use_bbs = self.args.use_bbs_gt or self.args.use_bbs_feedback if self.args.group_atts: # Each group has exactly one positive attribute. self.attribute_grouping = self.dm.dataset.attribute_grouping if self.args.use_raw_acc: self.attributes = self.dm.dataset.grouped_attribute_names else: self.attribute_grouping = None
def main(): global args args = get_args() use_gpu = True model = models.init_model(name=args.arch, num_classes=751, loss={'xent'}, use_gpu=args.gpu).cuda() checkpoint = torch.load(args.model, map_location={'cuda:0': 'cpu'}) pretrain_dict = checkpoint['state_dict'] model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args)) trainloader, testloader_dict = dm.return_dataloaders() q = evaluate(model, testloader_dict['market1501']['query']) g = evaluate(model, testloader_dict['market1501']['gallery']) import os import os.path as osp import scipy.io os.makedirs(args.dest, exist_ok=True) os.makedirs(osp.join(args.dest, 'query'), exist_ok=True) for pid, mapping in q.items(): os.makedirs(osp.join(args.dest, 'query', str(pid))) scipy.io.savemat( osp.join(args.dest, 'query', str(pid), '512_1024.mat'), mapping) os.makedirs(osp.join(args.dest, 'gallery'), exist_ok=True) for pid, mapping in g.items(): os.makedirs(osp.join(args.dest, 'gallery', str(pid))) scipy.io.savemat( osp.join(args.dest, 'gallery', str(pid), '512_1024.mat'), mapping)
def main(): global use_apex global args torch.manual_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False log_name = 'log_test.txt' if args.evaluate else 'log_train.txt' sys.stderr = sys.stdout = Logger(osp.join(args.save_dir, log_name)) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU, however, GPU is highly recommended") print("Initializing image data manager") dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args)) trainloader, testloader_dict = dm.return_dataloaders() print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dm.num_train_pids, loss={'xent'}, use_gpu=use_gpu, args=vars(args)) print(model) print("Model size: {:.3f} M".format(count_num_param(model))) if use_gpu: print("using gpu") model = model.cuda() print("criterion===>") criterion = get_criterion(dm.num_train_pids, use_gpu, args) print(criterion) print("regularizer===>") regularizer = get_regularizer(vars(args)) print(regularizer) print("optimizer===>") optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args)) print(optimizer) print("scheduler===>") scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', factor=0.1, patience=5, verbose=True) print(scheduler) if args.load_weights and check_isfile(args.load_weights): # load pretrained weights but ignore layers that don't match in size try: checkpoint = torch.load(args.load_weights) except Exception as e: print(e) checkpoint = torch.load(args.load_weights, map_location={'cuda:0': 'cpu'}) pretrain_dict = checkpoint['state_dict'] model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) print("Loaded pretrained weights from '{}'".format(args.load_weights)) max_r1 = 0 if args.resume and check_isfile(args.resume): checkpoint = torch.load(args.resume) state = model.state_dict() state.update(checkpoint['state_dict']) model.load_state_dict(state) optimizer.load_state_dict(checkpoint['optimizer']) args.start_epoch = checkpoint['epoch'] + 1 max_r1 = checkpoint['rank1'] print("Loaded checkpoint from '{}'".format(args.resume)) print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch, checkpoint['rank1'])) if use_apex: print("using apex") model, optimizer = amp.initialize(model, optimizer, opt_level="O0") if args.evaluate: print("Evaluate only") for name in args.target_names: print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'], testloader_dict[ name]['query_flip'] galleryloader = testloader_dict[name]['gallery'], testloader_dict[ name]['gallery_flip'] distmat = test(model, queryloader, galleryloader, use_gpu, return_distmat=True) if args.visualize_ranks: visualize_ranked_results(distmat, dm.return_testdataset_by_name(name), save_dir=osp.join( args.save_dir, 'ranked_results', name), topk=20) return start_time = time.time() ranklogger = RankLogger(args.source_names, args.target_names) train_time = 0 print("==> Start training") if args.fixbase_epoch > 0: oldenv = os.environ.get('sa', '') os.environ['sa'] = '' print( "Train {} for {} epochs while keeping other layers frozen".format( args.open_layers, args.fixbase_epoch)) initial_optim_state = optimizer.state_dict() for epoch in range(args.fixbase_epoch): start_train_time = time.time() train(epoch, model, criterion, regularizer, optimizer, trainloader, use_gpu, fixbase=True) train_time += round(time.time() - start_train_time) print("Done. All layers are open to train for {} epochs".format( args.max_epoch)) optimizer.load_state_dict(initial_optim_state) os.environ['sa'] = oldenv for epoch in range(args.start_epoch, args.max_epoch): auto_reset_learning_rate(optimizer, args) print( f"===========================start epoch {epoch + 1} {now()}===========================================" ) print(f"lr:{optimizer.param_groups[0]['lr']}") loss = train(epoch, model, criterion, regularizer, optimizer, trainloader, use_gpu, fixbase=False) train_time += round(time.time() - start_train_time) state_dict = model.state_dict() rank1 = 0 if (epoch + 1) > args.start_eval and args.eval_freq > 0 and ( epoch + 1) % args.eval_freq == 0 or (epoch + 1) == args.max_epoch: print("==> Test") for name in args.target_names: print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'], testloader_dict[ name]['query_flip'] galleryloader = testloader_dict[name][ 'gallery'], testloader_dict[name]['gallery_flip'] rank1 = test(model, queryloader, galleryloader, use_gpu) ranklogger.write(name, epoch + 1, rank1) if max_r1 < rank1: print('Save!', max_r1, rank1) save_checkpoint( { 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, 'optimizer': optimizer.state_dict(), }, False, osp.join(args.save_dir, 'checkpoint_best.pth.tar')) max_r1 = rank1 save_checkpoint( { 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, 'optimizer': optimizer.state_dict(), }, False, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) scheduler.step(rank1) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.". format(elapsed, train_time)) ranklogger.show_summary()
def main(): global args torch.manual_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False log_name = 'log_test.txt' if args.evaluate else 'log_train.txt' sys.stdout = Logger(osp.join(args.save_dir, log_name)) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU, however, GPU is highly recommended") print("Initializing image data manager") dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args)) trainloader, testloader_dict = dm.return_dataloaders() print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dm.num_train_pids, loss={'xent', 'htri'}) print("Model size: {:.3f} M".format(count_num_param(model))) criterion_xent = CrossEntropyLoss(num_classes=dm.num_train_pids, use_gpu=use_gpu, label_smooth=args.label_smooth) criterion_htri = TripletLoss(margin=args.margin) optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args)) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.stepsize, gamma=args.gamma) if args.load_weights and check_isfile(args.load_weights): # load pretrained weights but ignore layers that don't match in size checkpoint = torch.load(args.load_weights) pretrain_dict = checkpoint['state_dict'] model_dict = model.state_dict() pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()} model_dict.update(pretrain_dict) model.load_state_dict(model_dict) print("Loaded pretrained weights from '{}'".format(args.load_weights)) if args.resume and check_isfile(args.resume): checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) args.start_epoch = checkpoint['epoch'] + 1 print("Loaded checkpoint from '{}'".format(args.resume)) print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch, checkpoint['rank1'])) if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") for name in args.target_names: print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'] galleryloader = testloader_dict[name]['gallery'] distmat = test(model, queryloader, galleryloader, use_gpu, return_distmat=True) if args.visualize_ranks: visualize_ranked_results( distmat, dm.return_testdataset_by_name(name), save_dir=osp.join(args.save_dir, 'ranked_results', name), topk=20 ) return start_time = time.time() ranklogger = RankLogger(args.source_names, args.target_names) train_time = 0 print("=> Start training") if args.fixbase_epoch > 0: print("Train {} for {} epochs while keeping other layers frozen".format(args.open_layers, args.fixbase_epoch)) initial_optim_state = optimizer.state_dict() for epoch in range(args.fixbase_epoch): start_train_time = time.time() train(epoch, model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu, fixbase=True) train_time += round(time.time() - start_train_time) print("Done. All layers are open to train for {} epochs".format(args.max_epoch)) optimizer.load_state_dict(initial_optim_state) for epoch in range(args.start_epoch, args.max_epoch): start_train_time = time.time() train(epoch, model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu) train_time += round(time.time() - start_train_time) scheduler.step() if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (epoch + 1) % args.eval_freq == 0 or (epoch + 1) == args.max_epoch: print("=> Test") for name in args.target_names: print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'] galleryloader = testloader_dict[name]['gallery'] rank1 = test(model, queryloader, galleryloader, use_gpu) ranklogger.write(name, epoch + 1, rank1) if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, False, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print("Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".format(elapsed, train_time)) ranklogger.show_summary()
def __init__(self, args): self.args = args # Decide which processor (CPU or GPU) to use. if not args.use_avai_gpus: os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False # Start logger. ts = time.strftime("%Y-%m-%d_%H-%M-%S_") file = open(osp.join(args.save_experiment, "result_dict.pickle"), 'rb') result_dict = pickle.load(file) self.result_dict = result_dict file.close() self.result_manager = ResultManager(self.result_dict) print("Loaded result dict with keys: ") print(sorted(list(self.result_dict.keys()))) """ if "rejection_thresholds" in self.result_dict: self.rejector.load_thresholds(self.result_dict["rejection_thresholds"]) if self.rejector.is_initialized(): print("Loaded rejection thresholds. ") else: print("Loaded uninitialized (None) rejection thresholds. ") else: print("WARNING: Could not load rejection thresholds. ") """ split = self.args.eval_split ignored_test_datapoints = None self.result_manager.print_stored() labels, prediction_probs, predictions, hp_scores = self.result_manager.get_outputs( split) #_, prediction_probs_train, _, hp_scores_train = self.result_manager.get_outputs("train") _, prediction_probs_val, _, hp_scores_val = self.result_manager.get_outputs( "val") if self.result_manager.check_output_dict("test"): _, prediction_probs_test, _, hp_scores_test = self.result_manager.get_outputs( "test") else: prediction_probs_test, hp_scores_test = None, None loaded_args = result_dict["args"] f1_calibration_thresholds = result_dict["f1_thresholds"] attributes = result_dict["attributes"] positivity_ratio = result_dict["positivity_ratio"] #ignored_test_datapoints = result_dict["ignored_test_samples"] if self.args.use_confidence: if self.args.f1_calib: decision_thresholds = f1_calibration_thresholds else: decision_thresholds = None hp_scores = 1 - metrics.get_confidence(prediction_probs, decision_thresholds) hp_scores_train = 1 - metrics.get_confidence( prediction_probs_train, decision_thresholds) if hp_scores_test is not None: hp_scores_test = 1 - metrics.get_confidence( prediction_probs_test, decision_thresholds) hp_scores_val = 1 - metrics.get_confidence(prediction_probs_val, decision_thresholds) print("Using confidence scores as HP-scores. ") if args.f1_calib: predictions = prediction_probs > f1_calibration_thresholds else: predictions = prediction_probs > 0.5 num_datapoints = labels.shape[0] num_attributes = labels.shape[1] if ignored_test_datapoints is not None: print("Ignoring the {:.0%} hardest of testing examples. ".format( ignored_test_datapoints.mean())) """ attribute_hp_scores = hp_scores.mean(0) if args.reject_hard_attributes_quantile > 0: assert args.reject_hard_attributes_quantile <= 1 num_reject = int(num_attributes * args.reject_hard_attributes_quantile) sorted_idxs = attribute_hp_scores.argsort() hard_idxs = sorted_idxs[-num_reject:] ignored_attributes = np.zeros((num_attributes,), dtype="int8") ignored_attributes[hard_idxs] = 1 elif args.reject_harder_than < 1: ignored_attributes = attribute_hp_scores > args.reject_hard_attributes_threshold else: ignored_attributes = None if ignored_attributes is not None: print("Ignoring attributes: " + str(np.array(attributes)[ignored_attributes.astype("bool")])) """ #ignored_attributes = None acc_atts = metrics.mean_attribute_accuracies( predictions, labels, ignore=ignored_test_datapoints) average_precision = metrics.hp_average_precision( labels, predictions, hp_scores) print('Results ----------') #if ignored_attributes is None: print( metrics.get_metrics_table(predictions, labels, ignore=ignored_test_datapoints)) """ else: selected_attributes = np.logical_not(ignored_attributes) print(metrics.get_metrics_table( predictions[:, selected_attributes], labels[:, selected_attributes], ignore=None if ignored_test_datapoints is None else ignored_test_datapoints[:, selected_attributes])) """ print('------------------') print('Mean Attribute Accuracies:') header = [ "Attribute", "Accuracy", "Positivity Ratio", "Average Precision" ] table = tab.tabulate(zip(attributes, acc_atts, positivity_ratio, average_precision), floatfmt='.2%', headers=header) print(table) print( "Mean over all attributes of mean attribute accuracy of label prediction: {:.2%}" .format(acc_atts.mean())) print( "Mean average precision of hardness prediction over all attributes: {:.2%}" .format(average_precision.mean())) print('------------------') if args.plot_acc_hp or args.plot_hp_hist or args.plot_pos_hp or args.num_save_hard + args.num_save_easy > 0: #att_idx = attributes.index(args.hard_att) selected_attributes = args.select_atts print("Analyzing attributes: " + str(selected_attributes)) att_idxs = [attributes.index(att) for att in selected_attributes] hard_att_labels = labels[:, att_idxs] hard_att_pred = predictions[:, att_idxs] hard_att_prob = prediction_probs[:, att_idxs] if not loaded_args.hp_net_simple: # If a valid attribute is given, the hardness scores for that attribute are selected, else the mean # over all attributes is taken. hp_scores = hp_scores[:, att_idxs] #hp_scores_train = hp_scores_train[:, att_idxs] hp_scores_val = hp_scores_val[:, att_idxs] if hp_scores_test is not None: hp_scores_test = hp_scores_test[:, att_idxs] #print(hp_scores_val.shape) #print(hp_scores_test.shape) #print(hp_scores_train.shape) #print(hp_scores_test[:hp_scores_val.shape[0], :].shape) #print((hp_scores_test - hp_scores_val[:hp_scores_test.shape[0], :]).mean()) if args.plot_acc_hp: filename = osp.join(args.save_experiment, ts + "accuracy-over-hardness") #title = "Mean Accuracy over hardness" # for " + (args.load_weights if args.load_weights else ts) plot.show_accuracy_over_hardness(filename, selected_attributes, hard_att_labels, hard_att_pred, hp_scores, metric=args.plot_metric, save_plot=self.args.save_plot) if args.plot_pos_hp: filename = osp.join(args.save_experiment, ts + "positivity-over-hardness") #title = "Positivity Rate over hardness" # for " + (args.load_weights if args.load_weights else ts) plot.show_positivity_over_hardness(filename, selected_attributes, hard_att_labels, hard_att_pred, hp_scores, save_plot=self.args.save_plot) if args.plot_pos_atts: filename = osp.join(args.save_experiment, ts + "positivity-ratio") #title = "Positivity Rate over Attributes" # for " + (args.load_weights if args.load_weights else ts) plot.plot_positivity_ratio_over_attributes( attributes, positivity_ratio, filename, save_plot=self.args.save_plot) if args.plot_hp_hist: filename = osp.join(args.save_experiment, ts + "hardness-score-distribution") plot.plot_hardness_score_distribution( filename, selected_attributes, hp_scores_train, hp_scores_val, hp_scores_test, args.plot_x_max, save_plot=self.args.save_plot, confidnece=self.args.use_confidence) if args.num_save_hard + args.num_save_easy > 0 or args.show_example_imgs: print('Initializing image data manager') dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args)) if args.show_example_imgs: filename = osp.join(args.save_experiment, ts + "example_images.png") plot.show_example_imgs(dm.dataset, filename, save_plot=self.args.save_plot) if args.num_save_hard + args.num_save_easy > 0: assert len(self.args.select_atts) == 1 # This part only gets executed if the corresponding arguments are passed at the terminal. hp_scores = hp_scores.flatten() hard_att_labels = hard_att_labels.flatten() sorted_idxs = hp_scores.argsort() if args.show_pos_samples: sorted_idxs = sorted_idxs[hard_att_labels[sorted_idxs]] elif args.show_neg_samples: sorted_idxs = sorted_idxs[np.logical_not( hard_att_labels[sorted_idxs])] # Select easy and hard examples as specified in the terminal. if args.num_save_easy > 0: hard_idxs = sorted_idxs[0:args.num_save_easy * 3] hard_idxs = np.random.choice(hard_idxs, args.num_save_easy, replace=False) else: hard_idxs = sorted_idxs[-args.num_save_hard * 3:] hard_idxs = np.random.choice(hard_idxs, args.num_save_hard, replace=False) filename = osp.join(args.save_experiment, ts + "hard_images.png") title = "Examples by hardness for " + (args.load_weights if args.load_weights else ts) # Display the image examples. plot.show_img_grid(dm.split_dict[split], hard_idxs, filename, hp_scores[hard_idxs], save_plot=self.args.save_plot)
def main(): global args torch.manual_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False log_name = 'log_test.txt' if args.evaluate else 'log_train.txt' sys.stdout = Logger(osp.join(args.save_dir, log_name)) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU, however, GPU is highly recommended") print("Initializing image data manager") dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args)) trainloader, testloader_dict = dm.return_dataloaders() # ReID-Stream: print("Initializing ReID-Stream: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dm.num_train_pids, reid_dim=args.reid_dim, loss={'xent', 'htri'}) print("ReID Model size: {:.3f} M".format(count_num_param(model))) criterion_xent = CrossEntropyLoss(num_classes=dm.num_train_pids, use_gpu=use_gpu, label_smooth=args.label_smooth) criterion_htri = TripletLoss(margin=args.margin) # 2. Optimizer # Main ReID-Stream: optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args)) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.stepsize, gamma=args.gamma) if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") for name in args.target_names: print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'] galleryloader = testloader_dict[name]['gallery'] distmat = test(model, queryloader, galleryloader, use_gpu, return_distmat=True) if args.visualize_ranks: visualize_ranked_results(distmat, dm.return_testdataset_by_name(name), save_dir=osp.join( args.save_dir, 'ranked_results', name), topk=20) return start_time = time.time() ranklogger = RankLogger(args.source_names, args.target_names) train_time = 0 print("==> Start training") for epoch in range(args.start_epoch, args.max_epoch): start_train_time = time.time() train(epoch, model, criterion_xent, criterion_htri, \ optimizer, trainloader, use_gpu) train_time += round(time.time() - start_train_time) scheduler.step() if (epoch + 1) > args.start_eval and args.eval_freq > 0 and ( epoch + 1) % args.eval_freq == 0 or (epoch + 1) == args.max_epoch: print("==> Test") for name in args.target_names: print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'] galleryloader = testloader_dict[name]['gallery'] rank1 = test(model, queryloader, galleryloader, use_gpu) ranklogger.write(name, epoch + 1, rank1) if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint( { 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, False, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.". format(elapsed, train_time)) ranklogger.show_summary()
def main(): global args torch.manual_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False log_name = 'log_train_{}.txt'.format(time.strftime("%Y-%m-%d-%H-%M-%S")) if args.evaluate: log_name.replace('train', 'test') sys.stdout = Logger(osp.join(args.save_dir, log_name)) print(' '.join(sys.argv)) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU, however, GPU is highly recommended") print("Initializing image data manager") dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args)) if hasattr(dm, 'lfw_dataset'): lfw = dm.lfw_dataset print('LFW dataset is used!') else: lfw = None trainloader, trainloader_dict, testloader_dict = dm.return_dataloaders() num_train_pids = dm.num_train_pids print("Initializing model: {}".format(args.arch)) model = models.init_model( name=args.arch, num_classes=num_train_pids, loss={'xent', 'htri'}, pretrained=False if args.load_weights else 'imagenet', grayscale=args.grayscale, normalize_embeddings=args.normalize_embeddings, normalize_fc=args.normalize_fc, convbn=args.convbn) print("Model size: {:.3f} M".format(count_num_param(model))) count_flops(model, args.height, args.width, args.grayscale) if args.load_weights and check_isfile(args.load_weights): # load pretrained weights but ignore layers that don't match in size load_weights(model, args.load_weights) print("Loaded pretrained weights from '{}'".format(args.load_weights)) if args.resume and check_isfile(args.resume): checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) args.start_epoch = checkpoint['epoch'] + 1 print("Loaded checkpoint from '{}'".format(args.resume)) print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch, checkpoint['rank1'])) if use_gpu: model = nn.DataParallel(model).cuda() model = model.cuda() if args.evaluate: print("Evaluate only") for name in args.target_names: if not 'lfw' in name.lower(): print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'] galleryloader = testloader_dict[name]['gallery'] distmat = test(args, model, queryloader, galleryloader, use_gpu, return_distmat=True) if args.visualize_ranks: visualize_ranked_results( distmat, dm.return_testdataset_by_name(name), save_dir=osp.join(args.save_dir, 'ranked_results', name), topk=20) else: model.eval() same_acc, diff_acc, all_acc, auc, thresh = evaluate( args, dm.lfw_dataset, model, compute_embeddings_lfw, args.test_batch_size, verbose=False, show_failed=args.show_failed) log.info('Validation accuracy: {0:.4f}, {1:.4f}'.format( same_acc, diff_acc)) log.info('Validation accuracy mean: {0:.4f}'.format(all_acc)) log.info('Validation AUC: {0:.4f}'.format(auc)) log.info('Estimated threshold: {0:.4f}'.format(thresh)) return criterions = choose_losses(args, dm, model, use_gpu) if not args.evaluate and len(criterions) == 0: raise AssertionError('No loss functions were chosen!') optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args)) if args.load_optim: checkpoint = torch.load(args.load_weights) optimizer.load_state_dict(checkpoint['optim']) print("Loaded optimizer from '{}'".format(args.load_weights)) for param_group in optimizer.param_groups: param_group['lr'] = args.lr param_group['weight_decay'] = args.weight_decay scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.stepsize, gamma=args.gamma) start_time = time.time() ranklogger = RankLogger(args.source_names, args.target_names) train_time = 0 train_writer = SummaryWriter(osp.join(args.save_dir, 'train_log')) test_writer = SummaryWriter(osp.join(args.save_dir, 'test_log')) print("=> Start training") if args.fixbase_epoch > 0: print( "Train {} for {} epochs while keeping other layers frozen".format( args.open_layers, args.fixbase_epoch)) initial_optim_state = optimizer.state_dict() for epoch in range(args.fixbase_epoch): start_train_time = time.time() train(epoch, model, criterions, optimizer, trainloader, use_gpu, train_writer, fixbase=True, lfw=lfw) train_time += round(time.time() - start_train_time) for name in args.target_names: if not 'lfw' in name.lower(): print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'] galleryloader = testloader_dict[name]['gallery'] testloader = testloader_dict[name]['test'] criteria = None rank1 = test(args, model, queryloader, galleryloader, use_gpu, testloader=testloader, criterions=criteria) else: model.eval() same_acc, diff_acc, all_acc, auc, thresh = evaluate( args, dm.lfw_dataset, model, compute_embeddings_lfw, args.test_batch_size, verbose=False, show_failed=args.show_failed) print('Validation accuracy: {0:.4f}, {1:.4f}'.format( same_acc, diff_acc)) print('Validation accuracy mean: {0:.4f}'.format(all_acc)) print('Validation AUC: {0:.4f}'.format(auc)) print('Estimated threshold: {0:.4f}'.format(thresh)) rank1 = all_acc print("Done. All layers are open to train for {} epochs".format( args.max_epoch)) optimizer.load_state_dict(initial_optim_state) for epoch in range(args.start_epoch, args.max_epoch): for criterion in criterions: criterion.train_stats.reset() start_train_time = time.time() train(epoch, model, criterions, optimizer, trainloader, use_gpu, train_writer, lfw=lfw) train_time += round(time.time() - start_train_time) scheduler.step() if (epoch + 1) > args.start_eval and args.eval_freq > 0 and ( epoch + 1) % args.eval_freq == 0 or (epoch + 1) == args.max_epoch: num_iter = (epoch + 1) * len(trainloader) if not args.no_train_quality: for name in args.source_names: print( "Measure quality on the {} train set...".format(name)) queryloader = trainloader_dict[name]['query'] galleryloader = trainloader_dict[name]['gallery'] rank1 = test(args, model, queryloader, galleryloader, use_gpu) train_writer.add_scalar('rank1/{}'.format(name), rank1, num_iter) print("=> Test") for name in args.target_names: if not 'lfw' in name.lower(): print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'] galleryloader = testloader_dict[name]['gallery'] testloader = testloader_dict[name]['test'] criteria = criterions if args.no_loss_on_val: criteria = None rank1 = test(args, model, queryloader, galleryloader, use_gpu, testloader=testloader, criterions=criteria) test_writer.add_scalar('rank1/{}'.format(name), rank1, num_iter) if not args.no_loss_on_val: for criterion in criterions: test_writer.add_scalar( 'loss/{}'.format(criterion.name), criterion.test_stats.avg, num_iter) criterion.test_stats.reset() ranklogger.write(name, epoch + 1, rank1) else: model.eval() same_acc, diff_acc, all_acc, auc, thresh = evaluate( args, dm.lfw_dataset, model, compute_embeddings_lfw, args.test_batch_size, verbose=False, show_failed=args.show_failed) print('Validation accuracy: {0:.4f}, {1:.4f}'.format( same_acc, diff_acc)) print('Validation accuracy mean: {0:.4f}'.format(all_acc)) print('Validation AUC: {0:.4f}'.format(auc)) print('Estimated threshold: {0:.4f}'.format(thresh)) test_writer.add_scalar('Accuracy/Val_same_accuracy', same_acc, num_iter) test_writer.add_scalar('Accuracy/Val_diff_accuracy', diff_acc, num_iter) test_writer.add_scalar('Accuracy/Val_accuracy', all_acc, num_iter) test_writer.add_scalar('Accuracy/AUC', auc, num_iter) rank1 = all_acc if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_dict = { 'state_dict': state_dict, 'epoch': epoch, 'optim': optimizer.state_dict() } if len(args.target_names): save_dict['rank1'] = rank1 save_checkpoint( save_dict, False, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.". format(elapsed, train_time)) ranklogger.show_summary()
def main(): global args set_random_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False log_name = 'log_test.txt' if args.evaluate else 'log_train.txt' sys.stdout = Logger(osp.join(args.save_dir, log_name)) print('==========\nArgs:{}\n=========='.format(args)) if use_gpu: print('Currently using GPU {}'.format(args.gpu_devices)) cudnn.benchmark = True else: warnings.warn( 'Currently using CPU, however, GPU is highly recommended') print('Initializing image data manager') dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args)) trainloader, testloader_dict = dm.return_dataloaders() print('Initializing model: {}'.format(args.arch)) model = models.init_model(name=args.arch, num_classes=dm.num_train_pids, loss={'xent'}, pretrained=not args.no_pretrained, use_gpu=use_gpu) print('Model size: {:.3f} M'.format(count_num_param(model))) if args.load_weights and check_isfile(args.load_weights): load_pretrained_weights(model, args.load_weights) model = nn.DataParallel(model).cuda() if use_gpu else model criterion = CrossEntropyLoss(num_classes=dm.num_train_pids, use_gpu=use_gpu, label_smooth=args.label_smooth) optimizer = init_optimizer(model, **optimizer_kwargs(args)) scheduler = init_lr_scheduler(optimizer, **lr_scheduler_kwargs(args)) if args.resume and check_isfile(args.resume): args.start_epoch = resume_from_checkpoint(args.resume, model, optimizer=optimizer) if args.evaluate: print('Evaluate only') for name in args.target_names: print('Evaluating {} ...'.format(name)) queryloader = testloader_dict[name]['query'] galleryloader = testloader_dict[name]['gallery'] distmat = test(model, queryloader, galleryloader, use_gpu, return_distmat=True) if args.visualize_ranks: visualize_ranked_results(distmat, dm.return_testdataset_by_name(name), save_dir=osp.join( args.save_dir, 'ranked_results', name), topk=20) return time_start = time.time() ranklogger = RankLogger(args.source_names, args.target_names) print('=> Start training') if args.fixbase_epoch > 0: print( 'Train {} for {} epochs while keeping other layers frozen'.format( args.open_layers, args.fixbase_epoch)) initial_optim_state = optimizer.state_dict() for epoch in range(args.fixbase_epoch): train(epoch, model, criterion, optimizer, trainloader, use_gpu, fixbase=True) print('Done. All layers are open to train for {} epochs'.format( args.max_epoch)) optimizer.load_state_dict(initial_optim_state) for epoch in range(args.start_epoch, args.max_epoch): train(epoch, model, criterion, optimizer, trainloader, use_gpu) scheduler.step() if (epoch + 1) > args.start_eval and args.eval_freq > 0 and ( epoch + 1) % args.eval_freq == 0 or (epoch + 1) == args.max_epoch: print('=> Test') for name in args.target_names: print('Evaluating {} ...'.format(name)) queryloader = testloader_dict[name]['query'] galleryloader = testloader_dict[name]['gallery'] rank1 = test(model, queryloader, galleryloader, use_gpu) ranklogger.write(name, epoch + 1, rank1) save_checkpoint( { 'state_dict': model.state_dict(), 'rank1': rank1, 'epoch': epoch + 1, 'arch': args.arch, 'optimizer': optimizer.state_dict(), }, args.save_dir) elapsed = round(time.time() - time_start) elapsed = str(datetime.timedelta(seconds=elapsed)) print('Elapsed {}'.format(elapsed)) ranklogger.show_summary()
def main(): global args torch.manual_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False log_name = 'log_test.txt' sys.stdout = Logger(osp.join(args.save_dir, log_name)) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU, however, GPU is highly recommended") print("Initializing image data manager") if not args.convert_to_onnx: # and not args.infer: dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args)) trainloader, trainloader_dict, testloader_dict = dm.return_dataloaders( ) num_train_pids = 100 print("Initializing model: {}".format(args.arch)) model = models.init_model( name=args.arch, num_classes=num_train_pids, loss={'xent', 'htri'}, pretrained=False if args.load_weights else 'imagenet', grayscale=args.grayscale, ceil_mode=not args.convert_to_onnx, infer=True, bits=args.bits, normalize_embeddings=args.normalize_embeddings, normalize_fc=args.normalize_fc, convbn=args.convbn) print("Model size: {:.3f} M".format(count_num_param(model))) if args.load_weights and check_isfile(args.load_weights): # load pretrained weights but ignore layers that don't match in size load_weights(model, args.load_weights) print("Loaded pretrained weights from '{}'".format(args.load_weights)) if args.absorb_bn: search_absorbed_bn(model) if args.quantization or args.save_quantized_model: from gap_quantization.quantization import ModelQuantizer from gap_quantization.dump_utils import dump_quant_params, remove_extra_dump, remove_cat_files if args.quant_data_dir is None: raise AttributeError('quant-data-dir argument is required.') num_channels = 1 if args.grayscale else 3 cfg = { "bits": args.bits, # number of bits to store weights and activations "accum_bits": 32, # number of bits to store intermediate convolution result "signed": True, # use signed numbers "save_folder": args.save_dir, # folder to save results "data_source": args. quant_data_dir, # folder with images to collect dataset statistics "use_gpu": False, # use GPU for inference "batch_size": 1, "num_workers": 0, # number of workers for PyTorch dataloader "verbose": True, "save_params": args. save_quantized_model, # save quantization parameters to the file "quantize_forward": True, # replace usual convs, poolings, ... with GAP-like ones "num_input_channels": num_channels, "raw_input": args.no_normalize, "double_precision": args.double_precision # use double precision convolutions } model = model.cpu() quantizer = ModelQuantizer( model, cfg, dm.transform_test ) # transform test is OK if we use args.no_normalize quantizer.quantize_model( ) # otherwise we need to add QuantizeInput operation if args.infer: if args.image_path == '': raise AttributeError('Image for inference is required') quantizer.dump_activations(args.image_path, dm.transform_test, save_dir=os.path.join( args.save_dir, 'activations_dump')) dump_quant_params(args.save_dir, args.convbn) if args.convbn: remove_extra_dump( os.path.join(args.save_dir, 'activations_dump')) remove_cat_files(args.save_dir) if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") for name in args.target_names: if not 'lfw' in name.lower(): print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'] galleryloader = testloader_dict[name]['gallery'] distmat = test(args, model, queryloader, galleryloader, use_gpu, return_distmat=True) if args.visualize_ranks: visualize_ranked_results( distmat, dm.return_testdataset_by_name(name), save_dir=osp.join(args.save_dir, 'ranked_results', name), topk=20) else: model.eval() same_acc, diff_acc, all_acc, auc, thresh = evaluate( args, dm.lfw_dataset, model, compute_embeddings_lfw, args.test_batch_size, verbose=False, show_failed=args.show_failed, load_embeddings=args.load_embeddings) log.info('Validation accuracy: {0:.4f}, {1:.4f}'.format( same_acc, diff_acc)) log.info('Validation accuracy mean: {0:.4f}'.format(all_acc)) log.info('Validation AUC: {0:.4f}'.format(auc)) log.info('Estimated threshold: {0:.4f}'.format(thresh)) #roc_auc(model, '/home/maxim/data/lfw/pairsTest.txt', '/media/slow_drive/cropped_lfw', args, use_gpu) return
def main(): global args glob_min_loss = 1 # args.source_names = ['total-text'] # args.target_names = ['total-text'] # args.height = 384 # args.width = 384 # args.negative_ratio = 3 # args.optim = 'amsgrad' # args.lr = 1e-4 # args.max_epoch = 100 # args.stepsize = [40] # args.train_batch_size = 2 # args.workers = 8 # args.arch = 'se_resnext101_32x4d' # args.save_dir = 'log/se_resnext101_32x4d-final-text-net-total-text-no-randomcrop' # args.gpu_devices = '0' # args.resume = 'log/se_resnext101_32x4d-final-text-net-total-text-no-randomcrop/quick_save_checkpoint_ep6.pth.tar' # args.load_weights = 'log/se_resnext101_32x4d-final-text-net-total-text-lr-1e-5/quick_save_checkpoint_ep56.pth.tar' set_random_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False log_name = 'log_test.txt' if args.evaluate else 'log_train.txt' sys.stdout = Logger(osp.join(args.save_dir, log_name)) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True else: print("Currently using CPU, however, GPU is highly recommended") print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch) print("Model size: {:.3f} M".format(count_num_param(model))) criterion = TextLoss() optimizer = init_optimizer(model, **optimizer_kwargs(args)) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.stepsize, gamma=args.gamma) pickle.load = partial(pickle.load, encoding="latin1") pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1") if args.load_weights and check_isfile(args.load_weights): # load pretrained weights but ignore layers that don't match in size checkpoint = torch.load(args.load_weights, pickle_module=pickle) pretrain_dict = checkpoint['state_dict'] model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) print("Loaded pretrained weights from '{}'".format(args.load_weights)) del checkpoint torch.cuda.empty_cache() start_idx = 0 if args.resume and check_isfile(args.resume): checkpoint = torch.load(args.resume) if 'avg_loss' in checkpoint.keys(): print('avg loss: ', checkpoint['avg_loss']) model.load_state_dict(checkpoint['state_dict']) args.start_epoch = checkpoint['epoch'] + 1 print("Loaded checkpoint from '{}'".format(args.resume)) print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch, checkpoint['rank1'])) if 'optimizer' in checkpoint.keys(): optimizer.load_state_dict(checkpoint['optimizer']) if use_gpu: for state in optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() if 'step' in checkpoint.keys(): args.start_epoch -= 1 start_idx = checkpoint['step'] + 1 print('start_idx: ', start_idx) for epoch in range(0, args.start_epoch): scheduler.step() continue del checkpoint torch.cuda.empty_cache() if use_gpu: model = nn.DataParallel(model).cuda() print("Initializing image data manager") dm = DetectImageManager(model, use_gpu, **image_dataset_kwargs(args)) trainloader = dm.return_dataloaders() start_time = time.time() # ranklogger = RankLogger(args.source_names, args.target_names) train_time = 0 print("=> Start training") if args.fixbase_epoch > 0: print( "Train {} for {} epochs while keeping other layers frozen".format( args.open_layers, args.fixbase_epoch)) initial_optim_state = optimizer.state_dict() for epoch in range(args.fixbase_epoch): start_train_time = time.time() train(epoch, model, criterion, optimizer, trainloader, use_gpu, fixbase=True) train_time += round(time.time() - start_train_time) print("Done. All layers are open to train for {} epochs".format( args.max_epoch)) optimizer.load_state_dict(initial_optim_state) for epoch in range(args.start_epoch, args.max_epoch): start_train_time = time.time() scheduler.step() local_loss = train(epoch, model, criterion, optimizer, trainloader, use_gpu, start_idx=start_idx) start_idx = 0 train_time += round(time.time() - start_train_time) if (epoch + 1) > args.start_eval and args.eval_freq > 0 and ( epoch + 1) % args.eval_freq == 0 or (epoch + 1) == args.max_epoch: print("=> Test") if epoch % 10 == 0 or local_loss < glob_min_loss: if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() optimizer_state_dict = optimizer.state_dict() if local_loss < glob_min_loss: glob_min_loss = local_loss save_checkpoint( { 'state_dict': state_dict, 'optimizer': optimizer_state_dict, 'rank1': 0, 'epoch': epoch, 'avg_loss': local_loss, }, False, osp.join( args.save_dir, 'quick_save_checkpoint_ep' + str(epoch + 1) + '.pth.tar')) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.". format(elapsed, train_time))
def main(): global args, criterion, testloader_dict, trainloader, use_gpu set_random_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False log_name = 'test.log' if args.evaluate else 'train.log' sys.stdout = Logger(osp.join(args.save_dir, log_name)) print('==========\nArgs:{}\n=========='.format(args)) if use_gpu: print('Currently using GPU {}'.format(args.gpu_devices)) cudnn.benchmark = True else: warnings.warn('Currently using CPU, however, GPU is highly recommended') print('Initializing image data manager') dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args)) trainloader, testloader_dict = dm.return_dataloaders() print('Initializing model: {}'.format(args.arch)) model = models.init_model(name=args.arch, num_classes=dm.num_train_pids, loss={'xent'}, pretrained=not args.no_pretrained, use_gpu=use_gpu) print('Model size: {:.3f} M'.format(count_num_param(model))) if args.load_weights and check_isfile(args.load_weights): load_pretrained_weights(model, args.load_weights) model = nn.DataParallel(model).cuda() if use_gpu else model criterion = CrossEntropyLoss(num_classes=dm.num_train_pids, use_gpu=use_gpu, label_smooth=args.label_smooth) if args.resume and check_isfile(args.resume): args.start_epoch = resume_from_checkpoint(args.resume, model, optimizer=None) resumed = True else: resumed = False if args.evaluate: print('Evaluate only') for name in args.target_names: print('Evaluating {} ...'.format(name)) queryloader = testloader_dict[name]['query'] galleryloader = testloader_dict[name]['gallery'] distmat = test(model, queryloader, galleryloader, use_gpu, return_distmat=True) if args.visualize_ranks: visualize_ranked_results( distmat, dm.return_testdataset_by_name(name), save_dir=osp.join(args.save_dir, 'ranked_results', name), topk=20 ) return time_start = time.time() # ranklogger = RankLogger(args.source_names, args.target_names) print('=> Start training') if not resumed: train_base(model) train_RRI(model, 7) elapsed = round(time.time() - time_start) elapsed = str(datetime.timedelta(seconds=elapsed)) print('Elapsed {}'.format(elapsed))
def main(): global args set_random_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False log_name = 'log_test.txt' if args.evaluate else 'log_train.txt' sys.stdout = Logger(osp.join(args.save_dir, log_name)) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True else: print("Currently using CPU, however, GPU is highly recommended") print("Initializing MultiScale data manager") assert args.train_batch_size % args.train_loss_batch_size == 0, "'{}' is not divisable by {}".format(args.train_loss_batch_size, args.train_loss_batch_size) dm = ImageDataManager(use_gpu, scales=[224,160], **image_dataset_kwargs(args)) trainloader, testloader_dict = dm.return_dataloaders() # sys.exit(0) print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dm.num_train_pids, input_size=args.width, loss={'xent'}, use_gpu=use_gpu) print("Model size: {:.3f} M".format(count_num_param(model))) # print(model) criterion = CrossEntropyLoss(num_classes=dm.num_train_pids, use_gpu=use_gpu, label_smooth=args.label_smooth) optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args)) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.stepsize, gamma=args.gamma) # # scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True, threshold=1e-04) if args.load_weights and check_isfile(args.load_weights): # load pretrained weights but ignore layers that don't match in size checkpoint = torch.load(args.load_weights) pretrain_dict = checkpoint['state_dict'] model_dict = model.state_dict() pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()} model_dict.update(pretrain_dict) model.load_state_dict(model_dict) print("Loaded pretrained weights from '{}'".format(args.load_weights)) if args.resume and check_isfile(args.resume): checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) args.start_epoch = checkpoint['epoch'] + 1 print("Loaded checkpoint from '{}'".format(args.resume)) print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch, checkpoint['rank1'])) if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") for name in args.target_names: print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'] galleryloader = testloader_dict[name]['gallery'] test_set = dm.return_testdataset_by_name(name) rank1, mAP = test(model, test_set, name, queryloader, galleryloader, use_gpu, visualize=args.visualize_ranks) return start_time = time.time() ranklogger = RankLogger(args.source_names, args.target_names) maplogger = RankLogger(args.source_names, args.target_names) train_time = 0 # Tensorboard writer = SummaryWriter(log_dir=osp.join('runs', args.save_dir)) print("=> Start training") if args.fixbase_epoch > 0: print("Train {} for {} epochs while keeping other layers frozen".format(args.open_layers, args.fixbase_epoch)) initial_optim_state = optimizer.state_dict() for epoch in range(args.fixbase_epoch): start_train_time = time.time() loss, prec1 = train(epoch, model, criterion, optimizer, trainloader, writer, use_gpu, fixbase=True) writer.add_scalar('train/loss', loss, epoch+1) writer.add_scalar('train/prec1', prec1, epoch+1) print('Epoch: [{:02d}] [Average Loss:] {:.4f}\t [Average Prec.:] {:.2%}'.format(epoch+1, loss, prec1)) train_time += round(time.time() - start_train_time) print("Done. All layers are open to train for {} epochs".format(args.max_epoch)) optimizer.load_state_dict(initial_optim_state) args.start_epoch += args.fixbase_epoch args.max_epoch += args.fixbase_epoch for epoch in range(args.start_epoch, args.max_epoch): start_train_time = time.time() loss, prec1 = train(epoch, model, criterion, optimizer, trainloader, writer, use_gpu) writer.add_scalar('train/loss', loss, epoch+1) writer.add_scalar('train/prec1', prec1, epoch+1) print('Epoch: [{:02d}] [Average Loss:] {:.4f}\t [Average Prec.:] {:.2%}'.format(epoch+1, loss, prec1)) train_time += round(time.time() - start_train_time) scheduler.step() if (epoch + 1) > args.start_eval and args.eval_freq > 0 and (epoch + 1) % args.eval_freq == 0 or (epoch + 1) == args.max_epoch: print("=> Test") for name in args.target_names: print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'] galleryloader = testloader_dict[name]['gallery'] test_set = dm.return_testdataset_by_name(name) if epoch+1 == args.max_epoch: rank1, mAP = test(model, test_set, name, queryloader, galleryloader, use_gpu, visualize=True) else: rank1, mAP = test(model, test_set, name, queryloader, galleryloader, use_gpu) writer.add_scalar(name + '_test/top1', rank1, epoch+1) writer.add_scalar(name + '_test/mAP', mAP, epoch+1) ranklogger.write(name, epoch + 1, rank1) maplogger.write(name, epoch + 1, mAP) if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, False, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) # save last checkpoint save_checkpoint({ 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, False, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print("Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".format(elapsed, train_time)) ranklogger.show_summary() maplogger.show_summary()
def main(): global args, dropout_optimizer torch.manual_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False log_name = 'log_test.txt' if args.evaluate else 'log_train.txt' sys.stderr = sys.stdout = Logger(osp.join(args.save_dir, log_name)) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU, however, GPU is highly recommended") print("Initializing image data manager") dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args)) trainloader, testloader_dict = dm.return_dataloaders() print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dm.num_train_pids, loss={'xent'}, use_gpu=use_gpu, dropout_optimizer=dropout_optimizer) print(model) print("Model size: {:.3f} M".format(count_num_param(model))) # criterion = WrappedCrossEntropyLoss(num_classes=dm.num_train_pids, use_gpu=use_gpu, label_smooth=args.label_smooth) criterion, fix_criterion, switch_criterion, htri_param_controller = get_criterions(dm.num_train_pids, use_gpu, args) regularizer, reg_param_controller = get_regularizer(args.regularizer) optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args)) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.stepsize, gamma=args.gamma) if args.load_weights and check_isfile(args.load_weights): # load pretrained weights but ignore layers that don't match in size try: checkpoint = torch.load(args.load_weights) except Exception as e: print(e) checkpoint = torch.load(args.load_weights, map_location={'cuda:0': 'cpu'}) # dropout_optimizer.set_p(checkpoint.get('dropout_p', 0)) # print(list(checkpoint.keys()), checkpoint['dropout_p']) pretrain_dict = checkpoint['state_dict'] model_dict = model.state_dict() pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()} model_dict.update(pretrain_dict) model.load_state_dict(model_dict) print("Loaded pretrained weights from '{}'".format(args.load_weights)) if args.resume and check_isfile(args.resume): checkpoint = torch.load(args.resume) state = model.state_dict() state.update(checkpoint['state_dict']) model.load_state_dict(state) # args.start_epoch = checkpoint['epoch'] + 1 print("Loaded checkpoint from '{}'".format(args.resume)) print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch, checkpoint['rank1'])) if use_gpu: model = nn.DataParallel(model, device_ids=list(range(len(args.gpu_devices.split(','))))).cuda() extract_train_info(model, trainloader)
def main(): global args if not args.evaluate: raise RuntimeError('Test only!') torch.manual_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False log_name = 'log_test.txt' if args.evaluate else 'log_train.txt' log_fn = osp.join(args.save_dir, log_name) sys.stderr = sys.stdout = Logger(log_fn) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU, however, GPU is highly recommended") print("Initializing image data manager") dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args)) trainloader, testloader_dict = dm.return_dataloaders() print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dm.num_train_pids, loss={'xent'}, use_gpu=use_gpu, args=vars(args)) print(model) print("Model size: {:.3f} M".format(count_num_param(model))) if args.load_weights and check_isfile(args.load_weights): # load pretrained weights but ignore layers that don't match in size try: checkpoint = torch.load(args.load_weights) except Exception as e: print(e) checkpoint = torch.load(args.load_weights, map_location={'cuda:0': 'cpu'}) pretrain_dict = checkpoint['state_dict'] model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) print("Loaded pretrained weights from '{}'".format(args.load_weights)) if args.resume and check_isfile(args.resume): checkpoint = torch.load(args.resume) state = model.state_dict() state.update(checkpoint['state_dict']) model.load_state_dict(state) # args.start_epoch = checkpoint['epoch'] + 1 print("Loaded checkpoint from '{}'".format(args.resume)) print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch, checkpoint['rank1'])) if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") for name in args.target_names: print("Evaluating {} ...".format(name)) distmat = test(model, testloader_dict[name], use_gpu, return_distmat=True) if args.visualize_ranks: visualize_ranked_results(distmat, dm.return_testdataset_by_name(name), save_dir=osp.join( args.save_dir, 'ranked_results', name), topk=20) return
def main(): global args, dropout_optimizer torch.manual_seed(args.seed) if not args.use_avai_gpus: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False log_name = 'log_test.txt' if args.evaluate else 'log_train.txt' sys.stderr = sys.stdout = Logger(osp.join(args.save_dir, log_name)) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu_devices)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU, however, GPU is highly recommended") print("Initializing image data manager") dm = ImageDataManager(use_gpu, **image_dataset_kwargs(args)) trainloader, testloader_dict = dm.return_dataloaders() print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dm.num_train_pids, loss={'xent'}, use_gpu=use_gpu, dropout_optimizer=dropout_optimizer, args=vars(args)) print(model) print("Model size: {:.3f} M".format(count_num_param(model))) # criterion = WrappedCrossEntropyLoss(num_classes=dm.num_train_pids, use_gpu=use_gpu, label_smooth=args.label_smooth) criterion, fix_criterion, switch_criterion, htri_param_controller = get_criterions( dm.num_train_pids, use_gpu, args) regularizer, reg_param_controller = get_regularizer(args.regularizer) optimizer = init_optimizer(model.parameters(), **optimizer_kwargs(args)) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.stepsize, gamma=args.gamma) if args.load_weights and check_isfile(args.load_weights): # load pretrained weights but ignore layers that don't match in size try: checkpoint = torch.load(args.load_weights) except Exception as e: print(e) checkpoint = torch.load(args.load_weights, map_location={'cuda:0': 'cpu'}) # dropout_optimizer.set_p(checkpoint.get('dropout_p', 0)) # print(list(checkpoint.keys()), checkpoint['dropout_p']) pretrain_dict = checkpoint['state_dict'] model_dict = model.state_dict() pretrain_dict = { k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size() } model_dict.update(pretrain_dict) model.load_state_dict(model_dict) print("Loaded pretrained weights from '{}'".format(args.load_weights)) if args.resume and check_isfile(args.resume): checkpoint = torch.load(args.resume) state = model.state_dict() state.update(checkpoint['state_dict']) model.load_state_dict(state) # args.start_epoch = checkpoint['epoch'] + 1 print("Loaded checkpoint from '{}'".format(args.resume)) print("- start_epoch: {}\n- rank1: {}".format(args.start_epoch, checkpoint['rank1'])) if use_gpu: model = nn.DataParallel( model, device_ids=list(range(len(args.gpu_devices.split(','))))).cuda() if args.evaluate: print("Evaluate only") for name in args.target_names: print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'], testloader_dict[ name]['query_flip'] galleryloader = testloader_dict[name]['gallery'], testloader_dict[ name]['gallery_flip'] distmat = test(model, queryloader, galleryloader, use_gpu, return_distmat=True) if args.visualize_ranks: visualize_ranked_results(distmat, dm.return_testdataset_by_name(name), save_dir=osp.join( args.save_dir, 'ranked_results', name), topk=20) return start_time = time.time() ranklogger = RankLogger(args.source_names, args.target_names) train_time = 0 print("==> Start training") if os.environ.get('test_first') is not None: for name in args.target_names: print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'], testloader_dict[ name]['query_flip'] galleryloader = testloader_dict[name]['gallery'], testloader_dict[ name]['gallery_flip'] rank1 = test(model, queryloader, galleryloader, use_gpu) if args.fixbase_epoch > 0: oldenv = os.environ.get('sa', '') os.environ['sa'] = '' print( "Train {} for {} epochs while keeping other layers frozen".format( args.open_layers, args.fixbase_epoch)) initial_optim_state = optimizer.state_dict() for epoch in range(args.fixbase_epoch): start_train_time = time.time() train(epoch, model, fix_criterion, regularizer, optimizer, trainloader, use_gpu, fixbase=True) train_time += round(time.time() - start_train_time) print("Done. All layers are open to train for {} epochs".format( args.max_epoch)) optimizer.load_state_dict(initial_optim_state) os.environ['sa'] = oldenv max_r1 = 0 for epoch in range(args.start_epoch, args.max_epoch): dropout_optimizer.set_epoch(epoch) reg_param_controller.set_epoch(epoch) htri_param_controller.set_epoch(epoch) dropout_optimizer.set_training(True) start_train_time = time.time() print(epoch, args.switch_loss) print(criterion) cond = args.switch_loss > 0 and epoch >= args.switch_loss cond = cond or (args.switch_loss < 0 and args.switch_loss + args.max_epoch < epoch) if cond: print('Switch!') criterion = switch_criterion train(epoch, model, criterion, regularizer, optimizer, trainloader, use_gpu, fixbase=False, switch_loss=cond) train_time += round(time.time() - start_train_time) if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint( { 'state_dict': state_dict, 'rank1': 0, 'epoch': epoch, 'dropout_p': dropout_optimizer.p, }, False, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) scheduler.step() if (epoch + 1) > args.start_eval and args.eval_freq > 0 and ( epoch + 1) % args.eval_freq == 0 or (epoch + 1) == args.max_epoch: print("==> Test") dropout_optimizer.set_training(False) # IMPORTANT! for name in args.target_names: print("Evaluating {} ...".format(name)) queryloader = testloader_dict[name]['query'], testloader_dict[ name]['query_flip'] galleryloader = testloader_dict[name][ 'gallery'], testloader_dict[name]['gallery_flip'] print('!!!!!!!!FC!!!!!!!!') os.environ['NOFC'] = '' rank1 = test(model, queryloader, galleryloader, use_gpu) ranklogger.write(name, epoch + 1, rank1) if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() if max_r1 < rank1: print('Save!', max_r1, rank1) save_checkpoint( { 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, 'dropout_p': dropout_optimizer.p, }, False, osp.join(args.save_dir, 'checkpoint_best.pth.tar')) max_r1 = rank1 elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.". format(elapsed, train_time)) ranklogger.show_summary()