def get_data(self, cate='train'): path_label = self.train_path_label.copy( ) if cate == 'train' else self.test_path_label.copy() random_ranges = np.random.randint(0, 1001, size=[5000, 11]) / 1000. while not self.should_stop: random.shuffle(path_label) # 随机文件 for ind, _path_label in enumerate(path_label): if self.should_stop: break # 生成所有的随机数,用来做数据增强 if cate == 'train': not_aug = self.not_aug # 放在循环内是因为在最后计算整个数据集时,需要设定不数据增强 if ind % 5000 == 0: random_ranges = np.random.randint( 0, 1001, size=[5000, 11]) / 1000. else: not_aug = True try: # 可能会删除一些图片。但已验证过,图片全都可用 file_path, label = _path_label img = cv2_imread(file_path) img = get_img_augmentation(img.astype(np.uint8), random_ranges[ind % 5000], not_aug, **arcface_cfg.img_aug_params) yield img, int(label) except: continue
def check_dir(self, dir): print(f'START: check dir {dir}.') dir_names = [ dir + n + '/' for n in os.listdir(dir) if 'face' in n and os.path.isdir(dir + n) ] for dir_name in dir_names: print(f'START: check dir {dir_name}.') file_paths = [ dir_name + n for n in os.listdir(dir_name) if os.path.isfile(dir_name + n) ] for file_path in file_paths: try: img = cv2_imread(file_path) if img is None: print(f'DELETE: read None from {file_path}.') os.remove(file_path) continue img = get_img_augmentation( img, np.random.randint(0, 101, size=11) / 100., **arcface_cfg.img_aug_params) except: print(f'DELETE: cannot augment {file_path}.') os.remove(file_path) print(f'FINISH: check dir {dir_name}.') print(f'FINISH: check dir {dir}.')
def check_files(self, save_dir): print(f'START: check total samples.') for path_label in (self.train_path_label, self.test_path_label): for file_path, label in path_label: try: img = cv2_imread(file_path) if img is None: print(f'DELETE: read None from {file_path}.') os.remove(file_path) continue img = get_img_augmentation( img, np.random.randint(0, 101, size=11) / 100., **arcface_cfg.img_aug_params) except: print(f'COPY: cannot augment {file_path}.') shutil.copyfile(file_path, save_dir) print(f'FINISH: check total samples.')
def get_imgs_for_similarity_from_datasets(self, num=6, same=3, datasets='total'): # 得到用于计算相似度的几张图片,两张同一人,剩下随机 path_labels = self.get_paths_for_similarity( num=num, same=same, dict_label_path=self.dict_label_path[datasets]) imgs, labels = [], [] for path, label in path_labels: try: img = cv2_imread(path) img = cv2.resize( img, (arcface_cfg.cnn_shape, arcface_cfg.cnn_shape), interpolation=cv2.INTER_AREA) imgs.append(img) labels.append(label) except: continue return np.array(imgs), labels
def get_imgs_by_class(self, cls, num=None, datasets='total'): """ # 根据指定cls得到所处datasets的num张图片 :param cls: :param num: 默认None表示全部图片 :param datasets: :return: """ path_dict = self.dict_label_path[datasets] # {label:[path], ...} paths = path_dict[f' {cls}'] if num is not None: paths = paths[:num] imgs = [] for path in paths: try: img = cv2_imread(path) img = cv2.resize( img, (arcface_cfg.cnn_shape, arcface_cfg.cnn_shape), interpolation=cv2.INTER_AREA) imgs.append(img) except: continue return np.array(imgs), [cls] * len(imgs)
def com_cls(self, cls=0, datasets='train'): # 如果类内图片过多,还要分批次进行。此时计算类内相似度不能使用ts.sim_matrix,需要先计算出所有id,再统一计算 app = self.app paths = app.sa.dict_label_path[datasets][str(f' {cls}')] num_pics = len(paths) imgs = [] for path in paths: try: img = cv2_imread(path) img = cv2.resize(img, (cfg.cnn_shape, cfg.cnn_shape), interpolation=cv2.INTER_AREA) imgs.append(img) except: continue labels = np.array([cls] * len(imgs)) imgs = np.array(imgs) feed_dict = { app.ts.keep_prob: 1, app.ts.training: False, app.ts.inputs: imgs, app.ts.labels: labels } run_list = [ app.ts.acc, app.ts.thetas_angle, app.ts.cosines, app.ts.sim_matrix ] acc, angles, cosines, sim_matrix = app.sess.run(run_list, feed_dict) # 本批次图片之间的相似度 sims = get_triu(sim_matrix) # self._com_cls_plot(train_angles, cosines, labels_list, sim_matrix) return num_pics, acc, angles, labels, sims