def calc_predict_result(self, predict_dict_path, class_count_dict_path): # calc predict result predict_dict = pickle_read(predict_dict_path) class_count_dict = pickle_read(class_count_dict_path) data_dict = { '00ACCURACY': [ self.id_name_dict[str(x)] for x in range(1, self.num_of_classes + 1) ] } class_acc = {} for i in range(1, self.num_of_classes + 1): data_dict['(%02d)%s' % (i, self.id_name_dict[str(i)])] = [] for j in range(1, self.num_of_classes + 1): key = '%d-%d' % (j, i) if key in predict_dict: acc = len(predict_dict[key]) / class_count_dict[str(j)] if i == j: class_acc[str(i)] = acc else: acc = 0.0 data_dict['(%02d)%s' % (i, self.id_name_dict[str(i)])].append(acc) _save_csv = os.path.join( 'model', self.prefix, 'visualize_folder', '%s_prediction_class_pairs.csv' % self.prefix.split('/')[0]) write_csv(_save_csv, data_dict) return class_acc
def _calc_inter_distance(self, feature_map_dir, avg_feature_dict=None): """ Calculate the inter distance inside each class, the data will be saved and organized as: { 'classid': { 'xxx.png': distance, ...: ... }, ...: ... } """ t1 = time.time() distance_dict = {} avg_feature_dict = pickle_read( './results/temp/%s_true_avg_feature_for_each_class.pkl' % self.prefix.split('/')[0]) for pkl in [ x for x in os.listdir(feature_map_dir) if 'features.pkl' in x ]: classid = pkl.split('_')[0] distance_dict[classid] = {} for _filename, _feature in pickle_read( os.path.join(feature_map_dir, pkl)).items(): distance_dict[classid][_filename] = self.feature_util.dist( avg_feature_dict[classid], _feature) pickle_write( './results/temp/%s_inter_class_distances.pkl' % self.prefix.split('/')[0], distance_dict) print('Time for _calc_inter_distance: %.1f s' % (time.time() - t1)) return distance_dict
def _calc_variance_each_class(self, inter_distance=None): """ Calculate the variance of each class, the data will be saved and organized as: { 'classid': variance, ...: ... } """ t1 = time.time() variance_dict = {} if inter_distance is None: inter_distance = pickle_read( './results/temp/%s_inter_class_distances.pkl' % self.prefix.split('/')[0]) for classid, distances in inter_distance.items(): count = len(distances.keys()) sum_d = .0 for _, d in distances.items(): sum_d += d variance_dict[classid] = sum_d / count pickle_write( './results/temp/%s_variance_each_class.pkl' % self.prefix.split('/')[0], variance_dict) print('Time for _calc_variance_each_class: %.1f s' % (time.time() - t1)) return variance_dict
def visualize_class_variance(self, variance_dict, class_acc=None): class_count_dict = pickle_read( './results/temp/%s_class_count_dict_seen.pkl' % self.prefix.split('/')[0]) data_dict = { '0CLASS': [ '%s' % (self.id_name_dict[str(i)]) for i in range(1, self.num_of_classes + 1) ], '1VARIANCE': [ '%f' % variance_dict[str(i)] for i in range(1, self.num_of_classes + 1) ], '2IMAGES_COUNT': [ '%d' % class_count_dict[str(i)] for i in range(1, self.num_of_classes + 1) ] } if class_acc is not None: data_dict['3ACCURACY'] = [ '%f' % class_acc[str(i)] for i in range(1, self.num_of_classes + 1) ] _save_csv = os.path.join( 'model', self.prefix, 'visualize_folder', '%s_class_variance_seen.csv' % self.prefix.split('/')[0]) write_csv(_save_csv, data_dict)
def predict(self, model, feature_map, picture_path): model.eval() if type(feature_map) == str: feature_map = pickle_read(feature_map) feature = self.feature_util.get_feature(picture_path, model, TestTransform(self.input_w, self.input_h)) pred_l, min_d = self.evaluate_single_file(feature, feature_map) return pred_l, min_d
def prepare_data_for_one( pictures_pool, limit=100, ignore_limit=False, output_path=r'/home/ubuntu/Program/Dish_recognition/dataset/test'): # clean_old_train_test_data() if os.path.exists(output_path): shutil.rmtree(output_path) os.makedirs(output_path) train_dict = pickle_read('./mapping_dict_for_train.pkl') train_dict_reverse = {} for k, v in train_dict.items(): train_dict_reverse[v] = k start_index = 10000 for label in os.listdir(pictures_pool): label_count = 0 if label not in train_dict_reverse: l = len(os.listdir(os.path.join(pictures_pool, label))) print('skip:', label, ', has count:', l) continue ground_truth_label = train_dict_reverse[label] for image_path in os.listdir(os.path.join(pictures_pool, label)): final_name = str(start_index) + '_' + ground_truth_label + '.png' img = cv2.imread(os.path.join(pictures_pool, label, image_path)) cv2.imwrite(os.path.join(output_path, final_name), img) start_index += 1 label_count += 1 if label_count == limit and not ignore_limit: break print(start_index - 10000, 'pictures prepared for specific one set.')
def _calc_avg_feature_map(self, feature_map_dir, model_count=1): t1 = time.time() feature_pkls = [ x for x in os.listdir(feature_map_dir) if 'features.pkl' in x ] avg_feature_dict = {} for pkl in feature_pkls: features = pickle_read(os.path.join(feature_map_dir, pkl)) features = list(features.values()) _avg_feature = np.zeros(shape=features[0].shape) for _feature in features: _feature = _feature.cpu().detach().numpy() _avg_feature += _feature divider = len(features) if model_count == 1 else (len(features) / 2) _avg_feature /= divider classid = pkl.split('_')[0] avg_feature_dict[classid] = torch.FloatTensor(_avg_feature) prefix = feature_map_dir.split('/')[-1].split('_')[0] pickle_write( './results/temp/%s_avg_feature_for_each_class.pkl' % prefix, avg_feature_dict) print('Time for _calc_avg_feature_map: %.1f s' % (time.time() - t1)) return avg_feature_dict
def visualize_acc_for_every_class(self, pkl_path, acc_for_every_class=None): if acc_for_every_class is None: acc_for_every_class = pickle_read(pkl_path) data_dict = {'1CLASS': [], '2COUNT': [], '3ACCURACY': []} for obj in acc_for_every_class: data_dict['1CLASS'].append(obj['id']) data_dict['2COUNT'].append(obj['count']) data_dict['3ACCURACY'].append(obj['accuracy'] * 100) write_csv('%s.csv' % pkl_path, data_dict)
def get_feature_map_average(models, sample_file_dir, temp_prefix, WIDTH, HEIGHT, weight_ls=None, sample_num_each_cls=5, ignore_limit=False): t1 = time.time() if not os.path.exists(sample_file_dir): print( 'You should prepare base samples first, call prepare_base_picture_for_class may help.' ) return None feature_map_dir = os.path.join('results', 'temp', temp_prefix + '_feature_pkls') if os.path.exists(feature_map_dir): shutil.rmtree(feature_map_dir) os.makedirs(feature_map_dir) name_to_id_dict = pickle_read('./constants/mapping_reverse_dict.pkl') for i, model in enumerate(models): class_count_dict = {} for cls_name in os.listdir(sample_file_dir): ground_truth_label = name_to_id_dict[ cls_name] if cls_name in name_to_id_dict else cls_name if ground_truth_label not in class_count_dict: class_count_dict[ground_truth_label] = 0 dir_full_path = os.path.join( sample_file_dir, cls_name) # open the directory in order. for file_name in os.listdir(dir_full_path): if ignore_limit or class_count_dict[ ground_truth_label] < sample_num_each_cls: file_full_path = os.path.join(dir_full_path, file_name) feature_on_gpu = get_feature(file_full_path, model, WIDTH, HEIGHT) _write_feature_map(label=ground_truth_label, feature=feature_on_gpu, file_name=file_name, feature_map_dir=feature_map_dir, weight=weight_ls[i]) class_count_dict[ground_truth_label] += 1 avg_feature_map = _calc_avg_feature_map(feature_map_dir, len(models)) print('Time for get_feature_map_average: %.1f s' % (time.time() - t1)) return avg_feature_map
def write_feature_map(self, label, feature, file_name, feature_map_dir): if not os.path.exists(feature_map_dir): os.makedirs(feature_map_dir) feature_map_name = os.path.join(feature_map_dir, '%s_features.pkl' % label) if not os.path.exists(feature_map_name): obj = { file_name: feature } else: obj = pickle_read(feature_map_name) obj[file_name] = feature pickle_write(feature_map_name, obj) return feature
def calc_top3_error(self, predict_dict_path, class_count_dict_path, seen, epoch): # calc top 3 error predict_dict = pickle_read(predict_dict_path) class_count_dict = pickle_read(class_count_dict_path) id_name = pickle_read( '/home/ubuntu/Program/Dish_recognition/program/constants/mapping_dict.pkl' ) for i in range(1, 95): key = '%d-%d' % (i, i) if key in predict_dict: acc = len(predict_dict[key]) / class_count_dict[str(i)] if acc < .8: # start to calc the top 3 error with open( os.path.join( 'results', 'data_analyze', '%s_top3_error.txt' % self.prefix.split('/')[0]), 'ab+') as f: acc_dict = {} time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) c = '\r\n\r\n%s (%s)\r\nEposh[%d] Top 3 Error of Class: %d (Dish: %s)\r\n' % ( time_str, seen, epoch, i, id_name[str(i)]) f.write(c.encode()) for j in range(1, 95): if i == j: continue key = '%d-%d' % (i, j) if key in predict_dict: acc = len(predict_dict[key] ) / class_count_dict[str(i)] * 100 acc_dict[str(j)] = acc acc_dict = sorted(acc_dict.items(), key=lambda x: x[1]) acc_dict.reverse() for cls_idx, acc in acc_dict[:3]: c = '\tClass: %s (Dish: %s) : %.2f\r\n' % ( cls_idx, id_name[cls_idx], acc) f.write(c.encode())
def get_accuracy_for_every_class(self, pkl_path, seen='none'): predict_label_dict = pickle_read( './results/temp/%s_predict_label_dict_%s.pkl' % (self.prefix.split('/')[0], seen)) class_count_dict = pickle_read( './results/temp/%s_class_count_dict_%s.pkl' % (self.prefix.split('/')[0], seen)) class_count_dict = sorted(class_count_dict.items(), key=lambda x: x[1]) accuracy_for_every_class = [] mapping = pickle_read('./constants/mapping_dict.pkl') for classid, classcount in class_count_dict: key = '%s-%s' % (classid, classid) acc = len(predict_label_dict[key] ) / classcount if key in predict_label_dict else 0 accuracy_for_every_class.append({ 'id': classid, 'accuracy': acc, 'count': classcount }) pickle_write(pkl_path, accuracy_for_every_class) return accuracy_for_every_class
def _write_feature_map(label, feature, file_name, feature_map_dir, weight=1.0): _zero = torch.Tensor([[.0 for i in range(2048)]]).cuda() _multiplier = torch.Tensor([[weight for i in range(2048)]]).cuda() _zero = torch.addcmul(_zero, 1, feature, _multiplier) if not os.path.exists(feature_map_dir): os.makedirs(feature_map_dir) feature_map_name = os.path.join(feature_map_dir, '%s_features.pkl' % label) if not os.path.exists(feature_map_name): obj = {file_name: _zero} else: obj = pickle_read(feature_map_name) obj[file_name] = _zero pickle_write(feature_map_name, obj) return _zero
def predict(model, feature_map, picture_path): """ get prediction for on picture arguments: model: the used model feature_map_path: path to load the feature_map, feature_map is needed for prediction picture_path: the path of the picture to classify return: the predicted label """ model.eval() if type(feature_map) == str: feature_map = pickle_read(feature_map) feature = get_feature(picture_path) pred_label, min_distance = evaluate_single_file(feature, feature_map) return pred_label, min_distance
def __init__(self, model, sample_file_dir, test_dir, prefix, input_w, input_h, sample_num_each_cls=5, model=None, tablewares_mapping_path=None): self.model = model self.sample_file_dir = sample_file_dir self.test_dir = test_dir self.prefix = prefix self.input_w = input_w self.input_h = input_h self.sample_num_each_cls = sample_num_each_cls self.mapping = None if tablewares_mapping_path is not None : self.mapping = pickle_read(tablewares_mapping_path) self.feature_util = FeatureUtil(input_w, input_h) self.feature_map = None
def __init__(self, sample_file_dir, test_dir, num_of_classes, prefix, WIDTH=None, HEIGHT=None): self.sample_file_dir = sample_file_dir self.test_dir = test_dir self.num_of_classes = num_of_classes self.prefix = prefix pkl_path = './constants/o_id_to_name.pkl' if self.num_of_classes > 50 else './constants/n_id_to_name.pkl' self.id_name_dict = pickle_read(pkl_path) if not os.path.exists( os.path.join('model', self.prefix, 'visualize_folder')): os.makedirs(os.path.join('model', self.prefix, 'visualize_folder')) if WIDTH is not None: self.WIDTH = WIDTH if HEIGHT is not None: self.HEIGHT = HEIGHT
def _calc_true_avg_feature(self, feature_pkls_dir): t1 = time.time() feature_pkls = [ x for x in os.listdir(feature_pkls_dir) if 'features.pkl' in x ] avg_feature_dict = {} for pkl in feature_pkls: features = pickle_read(os.path.join(feature_pkls_dir, pkl)) features = list(features.values()) _avg_feature = np.zeros(shape=features[0].shape) for _feature in features: _feature = _feature.cpu().detach().numpy() _avg_feature += _feature _avg_feature /= len(features) classid = pkl.split('_')[0] avg_feature_dict[classid] = torch.FloatTensor(_avg_feature) pickle_write( './results/temp/%s_true_avg_feature_for_each_class.pkl' % self.prefix.split('/')[0], avg_feature_dict) print('Time for _calc_true_avg_feature: %.1f s' % (time.time() - t1)) return avg_feature_dict
def get_mapping_dict(): return pickle_read('mapping_dict.pkl')
test_index += 1 print('training & test set prepared.') """ train set path test set path base sample path """ return if __name__ == '__main__': # training_for_94_vs_54() # exit(1000) name_to_id_dict = pickle_read('../constants/mapping_reverse_dict.pkl') train_classes = [ '洋葱', '日本豆腐', '粗海带炒肉', '炒饭', '煎饺', '肠粉', '小青椒炒蛋', '土豆鸡肉', '腐竹炒肉', '苦瓜炒肉', '大头菜', '炒腐皮', '蟹柳', '苦瓜包肉',
def get_mapping_list(): return pickle_read('mapping_list.pkl')
def prepare_dataset_different_train_test(dir_list=None, train_limit=300, test_limit=100, train_classes=None, unseen_test_classes=None): # this function only use to get different distribution data if dir_list is None: dir_list = os.listdir(origin_data_root) train_index, test_index, unseen_test_index = 10000, 10000, 10000 pic_sum_limit = 25 # > 25 will be choose. test_num_dict, train_num_dict = {}, {} clean_old_train_test_data() # first, prepare a training set with 2-level order, test set with 1-level order name_to_id_dict = pickle_read('../constants/mapping_reverse_dict.pkl') for index, cls_name in enumerate(dir_list): if '.zip' in cls_name: continue dir_full_path = os.path.join(origin_data_root, cls_name) pic_list = os.listdir(dir_full_path) random.shuffle(pic_list) feature_list = pic_list[:5] copy_files(src_dir=dir_full_path, des_dir=output_sample_path, pic_list=feature_list, cls_idx=name_to_id_dict[cls_name]) train_and_test_list = pic_list[5:] if (len(pic_list) < pic_sum_limit and train_classes is None) or ( train_classes is not None and cls_name not in train_classes): print(cls_name, 'not included for training.') if (unseen_test_classes is not None and cls_name in unseen_test_classes): for pic in train_and_test_list: src_path = os.path.join(dir_full_path, pic) output_name = str( unseen_test_index ) + '_' + name_to_id_dict[cls_name] + '.png' img = cv2.imread(src_path) cv2.imwrite( os.path.join(output_test_unseen_path, output_name), img) unseen_test_index += 1 continue if len(train_and_test_list) <= (train_limit + test_limit): mid = int(len(train_and_test_list) / 2) # train_list = train_and_test_list[:mid] test_list = train_and_test_list[mid:] else: test_list = train_and_test_list[:test_limit] # train_list = train_and_test_list[test_limit:] train_final_output_dir = output_train_path dir_path = os.path.join(distortion_train_path, cls_name) train_list = os.listdir(dir_path) for pic in train_list: src_path = os.path.join(dir_path, pic) output_name = str( train_index) + '_' + name_to_id_dict[cls_name] + '.png' img = cv2.imread(src_path) cv2.imwrite(os.path.join(train_final_output_dir, output_name), img) train_index += 1 for pic in test_list: src_path = os.path.join(dir_full_path, pic) output_name = str( test_index) + '_' + name_to_id_dict[cls_name] + '.png' img = cv2.imread(src_path) cv2.imwrite(os.path.join(output_test_path, output_name), img) test_index += 1 print('training & test set prepared.') return