def _calc_inter_distance(self, feature_map_dir, avg_feature_dict=None): """ Calculate the inter distance inside each class, the data will be saved and organized as: { 'classid': { 'xxx.png': distance, ...: ... }, ...: ... } """ t1 = time.time() distance_dict = {} avg_feature_dict = pickle_read( './results/temp/%s_true_avg_feature_for_each_class.pkl' % self.prefix.split('/')[0]) for pkl in [ x for x in os.listdir(feature_map_dir) if 'features.pkl' in x ]: classid = pkl.split('_')[0] distance_dict[classid] = {} for _filename, _feature in pickle_read( os.path.join(feature_map_dir, pkl)).items(): distance_dict[classid][_filename] = self.feature_util.dist( avg_feature_dict[classid], _feature) pickle_write( './results/temp/%s_inter_class_distances.pkl' % self.prefix.split('/')[0], distance_dict) print('Time for _calc_inter_distance: %.1f s' % (time.time() - t1)) return distance_dict
def _calc_avg_feature_map(self, feature_map_dir, model_count=1): t1 = time.time() feature_pkls = [ x for x in os.listdir(feature_map_dir) if 'features.pkl' in x ] avg_feature_dict = {} for pkl in feature_pkls: features = pickle_read(os.path.join(feature_map_dir, pkl)) features = list(features.values()) _avg_feature = np.zeros(shape=features[0].shape) for _feature in features: _feature = _feature.cpu().detach().numpy() _avg_feature += _feature divider = len(features) if model_count == 1 else (len(features) / 2) _avg_feature /= divider classid = pkl.split('_')[0] avg_feature_dict[classid] = torch.FloatTensor(_avg_feature) prefix = feature_map_dir.split('/')[-1].split('_')[0] pickle_write( './results/temp/%s_avg_feature_for_each_class.pkl' % prefix, avg_feature_dict) print('Time for _calc_avg_feature_map: %.1f s' % (time.time() - t1)) return avg_feature_dict
def _calc_variance_each_class(self, inter_distance=None): """ Calculate the variance of each class, the data will be saved and organized as: { 'classid': variance, ...: ... } """ t1 = time.time() variance_dict = {} if inter_distance is None: inter_distance = pickle_read( './results/temp/%s_inter_class_distances.pkl' % self.prefix.split('/')[0]) for classid, distances in inter_distance.items(): count = len(distances.keys()) sum_d = .0 for _, d in distances.items(): sum_d += d variance_dict[classid] = sum_d / count pickle_write( './results/temp/%s_variance_each_class.pkl' % self.prefix.split('/')[0], variance_dict) print('Time for _calc_variance_each_class: %.1f s' % (time.time() - t1)) return variance_dict
def _calc_exter_class_distance(self, avg_feature_dict): """ Calculate the exter distance for each center vector, the data will be saved and organized as: { 'id-id': distance, ...: ... } """ t1 = time.time() id_feature_ls = [(_id, _feature) for _id, _feature in avg_feature_dict.items()] exter_class_distance_dict, class_to_nearest_class = {}, {} for _i in range(len(id_feature_ls)): classid, feature = id_feature_ls[_i] nearest_id, neareast_d = None, 1e6 for _second_classid, _second_feature in id_feature_ls[_i + 1:]: _d = self.feature_util.dist(feature, _second_feature) _key = classid + '-' + _second_classid exter_class_distance_dict[_key] = _d if neareast_d > _d: neareast_d = _d nearest_id = _second_classid class_to_nearest_class[classid] = nearest_id pickle_write( './results/temp/%s_exter_class_distances.pkl' % self.prefix.split('/')[0], exter_class_distance_dict) print('Time for _calc_exter_class_distance: %.1f s' % (time.time() - t1)) return exter_class_distance_dict, class_to_nearest_class
def main(): mapping = get_mapping_dict() d = {} for _id, _name in mapping.items(): d[_name] = _id pickle_write('./mapping_reverse_dict.pkl', d) pass
def write_feature_map(self, label, feature, file_name, feature_map_dir): if not os.path.exists(feature_map_dir): os.makedirs(feature_map_dir) feature_map_name = os.path.join(feature_map_dir, '%s_features.pkl' % label) if not os.path.exists(feature_map_name): obj = { file_name: feature } else: obj = pickle_read(feature_map_name) obj[file_name] = feature pickle_write(feature_map_name, obj) return feature
def _write_feature_map(label, feature, file_name, feature_map_dir, weight=1.0): _zero = torch.Tensor([[.0 for i in range(2048)]]).cuda() _multiplier = torch.Tensor([[weight for i in range(2048)]]).cuda() _zero = torch.addcmul(_zero, 1, feature, _multiplier) if not os.path.exists(feature_map_dir): os.makedirs(feature_map_dir) feature_map_name = os.path.join(feature_map_dir, '%s_features.pkl' % label) if not os.path.exists(feature_map_name): obj = {file_name: _zero} else: obj = pickle_read(feature_map_name) obj[file_name] = _zero pickle_write(feature_map_name, obj) return _zero
def _calc_true_avg_feature(self, feature_pkls_dir): t1 = time.time() feature_pkls = [ x for x in os.listdir(feature_pkls_dir) if 'features.pkl' in x ] avg_feature_dict = {} for pkl in feature_pkls: features = pickle_read(os.path.join(feature_pkls_dir, pkl)) features = list(features.values()) _avg_feature = np.zeros(shape=features[0].shape) for _feature in features: _feature = _feature.cpu().detach().numpy() _avg_feature += _feature _avg_feature /= len(features) classid = pkl.split('_')[0] avg_feature_dict[classid] = torch.FloatTensor(_avg_feature) pickle_write( './results/temp/%s_true_avg_feature_for_each_class.pkl' % self.prefix.split('/')[0], avg_feature_dict) print('Time for _calc_true_avg_feature: %.1f s' % (time.time() - t1)) return avg_feature_dict
def get_accuracy_for_every_class(self, pkl_path, seen='none'): predict_label_dict = pickle_read( './results/temp/%s_predict_label_dict_%s.pkl' % (self.prefix.split('/')[0], seen)) class_count_dict = pickle_read( './results/temp/%s_class_count_dict_%s.pkl' % (self.prefix.split('/')[0], seen)) class_count_dict = sorted(class_count_dict.items(), key=lambda x: x[1]) accuracy_for_every_class = [] mapping = pickle_read('./constants/mapping_dict.pkl') for classid, classcount in class_count_dict: key = '%s-%s' % (classid, classid) acc = len(predict_label_dict[key] ) / classcount if key in predict_label_dict else 0 accuracy_for_every_class.append({ 'id': classid, 'accuracy': acc, 'count': classcount }) pickle_write(pkl_path, accuracy_for_every_class) return accuracy_for_every_class
def predict_pictures(self, test_pictures=None): pkls_dir = os.path.join('results', 'temp', self.prefix + '_all_test_pkls') if os.path.exists(pkls_dir): shutil.rmtree(pkls_dir) all_count, positive_count = 0, 0 predict_dict, class_count_dict = {}, {} if test_pictures is None: test_pictures = os.listdir(self.test_dir) dataset = ImageDataset([os.path.join(self.test_dir, x) for x in test_pictures], transform=TestTransform(self.input_w, self.input_h)) test_loader = DataLoader(dataset, batch_size=64, num_workers=2, pin_memory=True) for f_ls, l_ls, p_ls in test_loader: f_ls = self.models[0](torch.Tensor(f_ls).cuda()) for f, l, p in zip(f_ls, l_ls, p_ls): self.feature_util.write_feature_map(l, f, p, pkls_dir, weight=1.0) pred_l, min_d = self.evaluate_single_file(f) pred_k = l + '-' + pred_l if pred_k not in predict_dict: predict_dict[pred_k] = [] predict_dict[pred_k].append(p) if l not in class_count_dict: class_count_dict[l] = 0 class_count_dict[l] += 1 if l == pred_l: positive_count += 1 all_count += 1 if all_count % 1000 == 0: print('All:', all_count, ', Positive:', positive_count) print('test finished. all:', all_count, ', positive:', positive_count) pickle_write('./results/temp/%s_predict_label_dict_%s.pkl' % (self.prefix, seen), predict_dict) # store the prediction of each picture pickle_write('./results/temp/%s_class_count_dict_%s.pkl' % (self.prefix, seen), class_count_dict) # store the count of each class return positive_count / (all_count + 1e-12)
def train(self, balance_testset): """ Training using hard sample + sample re-weighting(proposed by keke) """ self._train_prepare() analyzer = Analyzer(sample_file_dir=self.sample_file_dir, test_dir=self.train_root, prefix=self.prefix, WIDTH=self.w, HEIGHT=self.h) max_acc, last_acc, drop_count, fail_max_count = .0, .0, 0, 0 max_acc_unseen = .0 overlap_dict = {} epoch = self.start_epoch for epoch in range(self.start_epoch, self.start_epoch + self.train_epochs): s_time = time.time() if self.step_size > 0: self.optimizer = _adjust_learning_rate(self.optimizer, epoch) next_margin = self.margin # get a brand new training set for a new epoch if self.num_train_imgs > self.num_train_pids * 100: if epoch == self.start_epoch: true_exter_class_top = None elif epoch % 5 == 0 or epoch == (self.start_epoch + 1): true_exter_class_top = exter_class_top else: pass if true_exter_class_top is not None: print('length of true_exter_class_top:', len(true_exter_class_top), ', and:', true_exter_class_top) train_pictures = get_training_set_list( self.train_root, train_limit=70, random_training_set=False, special_classes=true_exter_class_top) else: train_pictures = None # and then go through the training set, to get data needed for hard-sample if epoch % 5 == 0 or epoch == self.start_epoch: distance_dict, class_to_nearest_class = analyzer.analysis_for_hard_sample( self.model, test_pictures=train_pictures) train_using_metriclearning( self.model, self.optimizer, self.tri_criterion, epoch, self.train_root, train_pictures=train_pictures, batch_size=self.batch_size, distance_dict=distance_dict, class_to_nearest_class=class_to_nearest_class) exter_class_top, overlap_rate_dict_ls = analyzer.analysis_for_exter_class_overlap( model_path=None, model=self.model, WIDTH=self.w, HEIGHT=self.h) e_time = time.time() if epoch % 5 == 0 or epoch == 2: overlap_dict[epoch] = overlap_rate_dict_ls # true testing on seen classes acc = self.tester.evaluate_with_models(seen='seen') print( 'Margin: {}, Epoch: {}, Acc: {:.3}%, Top overlap rate: {:.4} (on seen pictures)[Hard Sample + Sample Re-weighting]' .format(self.margin, epoch, acc * 100, overlap_rate_dict_ls[0][1])) if self.test_unseen_root is not None: # true testing on unseen classes acc_unseen = self.tester.evaluate_with_models(seen='unseen') max_acc_unseen = max(max_acc_unseen, acc_unseen) note = 'update:%.2f, on unseen%s' % ( self.update_conv_layers, ' - New Unseen Accuracy' if max_acc_unseen == acc_unseen else '') log(log_path=os.path.join(self.save_dir, 'readme.txt'), epoch=epoch, accuracy=acc_unseen, train_cls_count=self.num_train_pids, test_cls_count=self.num_test_unseen_pids, method='metric', note=note) print( 'Margin: {}, Epoch: {}, Acc: {:.3}% (on unseen pictures)[Hard Sample + Sample Re-weighting]' .format(self.margin, epoch, acc_unseen * 100)) else: acc_unseen = -1 max_acc = max(acc, max_acc) note = 'update:%.2f, on seen%s' % (self.update_conv_layers, ' - New Seen Accuracy' if max_acc == acc else '') log(log_path=os.path.join(self.save_dir, 'readme.txt'), epoch=epoch, accuracy=acc, train_cls_count=self.num_train_pids, test_cls_count=self.num_test_pids, method='metric', epoch_time=(e_time - s_time), note=note) if epoch == self.start_epoch: last_acc = acc else: if acc < last_acc: drop_count += 1 else: drop_count = 0 last_acc = acc if max_acc == acc: fail_max_count = 0 else: fail_max_count += 1 if 'inception3' == self.model_type: save_model_name = 'inception_v3_metric.tmpmodel.tar' else: save_model_name = 'resnet_metric.tmpmodel.tar' state_dict = self.model.module.state_dict( ) if self.use_gpu else self.model.state_dict() # save model, and check if its the best model. save as the best model if positive save_checkpoint( { 'state_dict': state_dict, 'epoch': epoch, }, is_best=acc == max_acc, save_dir=self.save_dir, filename=save_model_name, acc=acc, ) # if the accuracy keep dropping, stop training if (drop_count == 12 or fail_max_count == 24) and self.enable_stop_machanism: print( 'Accuracy dropping for %d times or smaller the max for %d times, stop in epoch %d\n' % (drop_count, fail_max_count, epoch)) break # if overlap_rate_dict_ls[0][1] < .1: # print('Top exter class overlap rate reach a smaller value than threshold, stop in epoch %d\n' % epoch) # break self.margin = next_margin with open(os.path.join(self.save_dir, 'readme.txt'), 'ab+') as f: c = '\r\n[Hard Sample + Sample Re-weighting] Training finished with: %d epoch, %.2f%% accuracy.' % ( epoch, max_acc * 100) f.write(c.encode()) self._clean_tmp_model(epoch) pickle_write( './results/temp/%s_v5_overlap_rate_dict.pkl' % self.prefix, overlap_dict) return max_acc, epoch
def predict_pictures(self, feature_map, index_list, seen, weight_ls, test_pictures=None): pkls_dir = os.path.join('results', 'temp', self.prefix + '_all_test_pkls') if os.path.exists(pkls_dir): shutil.rmtree(pkls_dir) all_count, positive_count, pred_to_unseen = 0, 0, 0 predict_dict, class_count_dict = {}, {} if test_pictures is None: test_pictures = os.listdir(self.test_dir) for i in test_pictures: file_path = os.path.join(self.test_dir, i) cls_idx = re.split( '_', file_path)[-1][:-4] # accroding to the directory name all_count += 1 feature_on_gpu = None for weight_index, model in enumerate(self.models): _f = self.get_feature(file_path, model) _zero = torch.Tensor([[.0 for _ in range(2048)]]).cuda() _multiplier = torch.Tensor( [[weight_ls[weight_index] for _ in range(2048)]]).cuda() _zero = torch.addcmul(_zero, 1, _f, _multiplier) if type(_zero) is not torch.Tensor: print('Expected torch.Tensor, got', type(feature_on_gpu)) exit(200) if feature_on_gpu is None: feature_on_gpu = np.zeros(shape=_zero.shape) feature_on_gpu += _zero.cpu().detach().numpy() feature_on_gpu = torch.FloatTensor(feature_on_gpu).cuda() self._write_feature_map(cls_idx, feature_on_gpu, file_path.split('/')[-1], pkls_dir, weight=1.0) pred_label, min_distance = self.evaluate_single_file( feature_on_gpu, feature_map) pred_key = cls_idx + '-' + pred_label if pred_key not in predict_dict: predict_dict[pred_key] = [file_path.split('/')[-1]] else: predict_dict[pred_key].append(file_path.split('/')[-1]) if cls_idx not in class_count_dict: class_count_dict[cls_idx] = 1 else: class_count_dict[cls_idx] += 1 if cls_idx == pred_label: # compute the correct num of the class positive_count += 1 if all_count % 1000 == 0: print('For now, all:', all_count, ', positive:', positive_count) print('all:', all_count, ', positive:', positive_count) pickle_write('./results/temp/%s_predict_label_dict_%s.pkl' % (self.prefix, seen), predict_dict) # store the prediction of each picture pickle_write('./results/temp/%s_class_count_dict_%s.pkl' % (self.prefix, seen), class_count_dict) # store the count of each class return positive_count / (all_count + 1e-12)