def find_repeat_img(data_pool, data_dir, repeat_dir): """ 查询重复的图片 :param data_pool:图片池 :param data_dir: 待处理图片 :param repeat_dir: 重复图片保存目录 :return: """ pool = [] for root, dirs, files in os.walk(data_pool): for file in files: name, _ = file.split(".") if name not in pool: pool.append(name) if not os.path.exists(repeat_dir): os.makedirs(repeat_dir) total = 0 repeat = 0 for root, dirs, files in os.walk(data_dir): for file in files: name, _ = file.split(".") total += 1 if name in pool: repeat += 1 file_util.move_file(os.path.join(root, file), repeat_dir) print("total={0}, repeat={1}".format(total, repeat)) pass
def verify_img(img_dir, error_img_save_dir): """ 图片校验,删除有问题的图片 :param img_dir: :param error_img_save_dir: :return: """ total = 0 remove = 0 if not os.path.exists(error_img_save_dir): os.makedirs(error_img_save_dir) for root, dirs, files in os.walk(img_dir): for img in files: if not img_util.is_img(img): continue total += 1 img_file = os.path.join(root, img) image = cv2.imread(img_file, cv2.IMREAD_GRAYSCALE) if image is None: print(img_file) file_util.move_file(img_file, error_img_save_dir) remove += 1 print("共{0}张图片,有问题的图片{1}张".format(total, remove))
def move_limit_width_height(img_dir, out_limit_dir, limit_width, limit_height, ext_list={'.JPG', '.jpg', '.png'}): """ 将img_dir中所有宽或高大于限制宽或高的图片移动到out_limit_dir目录中 :param img_dir: :param out_limit_dir: :param limit_width: :param limit_height: :param ext_list: :return: """ if not os.path.exists(out_limit_dir): os.makedirs(out_limit_dir) total = 0 limit = 0 for root, dirs, files in os.walk(img_dir): for f in files: ext = file_util.file_extension(os.path.join(root, f)) if ext in ext_list: total += 1 img = cv2.imread(os.path.join(root, f)) if img is None: file_util.move_file(os.path.join(root, f), out_limit_dir) json_f = file_util.file_basename(f) json_f += '.json' file_util.move_file(os.path.join(root, json_f), out_limit_dir) continue sp = img.shape height = sp[0] width = sp[1] channel = sp[2] if width > limit_width: file_util.move_file(os.path.join(root, f), out_limit_dir) json_f = file_util.file_basename(f) json_f += '.json' file_util.move_file(os.path.join(root, json_f), out_limit_dir) print('width: %d height: %d channel: %d' % (width, height, channel)) limit += 1 continue if height > limit_height: file_util.move_file(os.path.join(root, f), out_limit_dir) json_f = file_util.file_basename(f) json_f += '.json' file_util.move_file(os.path.join(root, json_f), out_limit_dir) print('width: %d height: %d channel: %d' % (width, height, channel)) limit += 1 continue print('total=' + str(total) + ',limit=' + str(limit))