def gen_11(tmp_dir): img_paths = get_all_file_from_dir(tmp_dir) alph, alph_label = get_num_path(img_paths, 4) six, six_label = get_num_path(img_paths, 6) one, one_label = get_num_path(img_paths, 1) if alph is not None and six is not None: alph = cv2.imread(alph) six = cv2.imread(six) one = cv2.imread(one) alph_shape = alph.shape six_shape = six.shape one_shape = one.shape if alph_shape[0] / alph_shape[1] > 1 and six_shape[0] / six_shape[ 1] > 1: # vertical alph = cv2.resize(alph, (32, int(32 / alph_shape[1] * alph_shape[0]))) six = cv2.resize(six, (32, int(32 / six_shape[1] * six_shape[0]))) one = cv2.resize(one, (32, int(32 / one_shape[1] * one_shape[0]))) return np.concatenate((alph, six, one), axis=0), alph_label + six_label + one_label if alph_shape[0] / alph_shape[1] < 1 and six_shape[0] / six_shape[ 1] < 1: # horizontal alph = cv2.resize(alph, (int(32 / alph_shape[0] * alph_shape[1]), 32)) six = cv2.resize(six, (int(32 / six_shape[0] * six_shape[1]), 32)) one = cv2.resize(one, (int(32 / one_shape[0] * one_shape[1]), 32)) return np.concatenate((alph, six, one), axis=1), alph_label + six_label + one_label return None, None
def load_all_data(): files_paths = get_all_file_from_dir(IMAGE_PATH) print(('training images size:' + str(len(files_paths)))) print('start load training data') data = {'one': {}, 'four': {}, 'six': {}, 'zero': []} for p in files_paths: img = cv2.imread(p) shape = img.shape img = img.reshape((1, shape[0], shape[1], shape[2])) _, image_name = os.path.split(p) label = image_name.replace('#', '').split('-')[0] if len(label) == 1: if label not in data['one'].keys(): data['one'][label] = [] data['one'][label].append({'label': label, 'img': img}) elif len(label) == 4: if label not in data['four'].keys(): data['four'][label] = [] data['four'][label].append({'label': label, 'img': img}) elif len(label) == 6: if label not in data['six'].keys(): data['six'][label] = [] data['six'][label].append({'label': label, 'img': img}) else: data['zero'].append({'label': label, 'img': img}) print('loading data finish') return data
def load_map(path): paths = get_all_file_from_dir(path) map = {} for p in paths: dir_, name = os.path.split(p) key = name.split('.')[0] map[key] = p return map
def get_all_orig_char_path(): """ 获取所有初始图片文本 :return: """ dir_path = GENERATE_DIE_PATH + 'orig' file_paths = get_all_file_from_dir(dir_path) return file_paths
def get_unrelated_image(): """ get image path that do not relate annotation path :return: """ unrelated_image = [] an_path = get_all_file_from_dir(ANNOTATION_PATH) image_path = get_all_file_from_dir(IMAGE_PATH) for p in image_path: is_related = False name = p.split('\\')[-1].split('.')[0] for a in an_path: if a.find(name) > 0: is_related = True if not is_related: unrelated_image.append(p) return unrelated_image
def main(): all_image_path = get_all_file_from_dir(HORIZONTAL_PATH) index = 0 for path in all_image_path: index += 1 if index % 1000 == 0: print(index) new_img = get_resize_image(path) new_img = np.rot90(new_img) save_img(new_img, path)
def load_annotation_info(): """ load annotation info :return: """ an_path = get_all_file_from_dir(ANNOTATION_PATH) annotation_info = [] for path in an_path: parse_info = parse_xml_file(path) annotation_info.append(parse_info) return annotation_info
def load_image_info(): """ load image info :return: """ image_paths = get_all_file_from_dir(IMAGE_PATH) image_map = {} for p in image_paths: name = p.split('\\')[-1] image_map[name] = p return image_paths
def main(): all_image_path = get_all_file_from_dir(HORIZONTAL_PATH) length = len(all_image_path) index = 0 for path in all_image_path: index += 1 if index % 100 == 0: print(str(index * 1.0 / length)) new_img = get_resize_image(path) # print(path) save_img(new_img, path)
def load_image_info(dir): """ load image info :return: """ image_paths = get_all_file_from_dir(dir) image_map = {} for p in image_paths: name = p.split('/')[-1].split('.')[0] image_map[name] = p return image_map
def main(): dir_path = 'E:\dataset/text_area\horizontal\horizontal_orig/' paths = get_all_file_from_dir(dir_path) for i, p in enumerate(paths): if i % 100 == 0: print(i) _, img_name = os.path.split(p) label = img_name.split('-')[0] if len(label) != 6: continue img = cv2.imread(p) seg_horozontal(label, img)
def main(): index = 0 paths = get_all_file_from_dir(ori_path) for index, p in enumerate(paths): seg = int(index / 2000) + 1 _, name = os.path.split(p) label = name.split('-')[0] new_name = str(index) + '-' + str(uuid.uuid4()) + '.jpg' index += 1 d_path = des_path + new_name shutil.copy(p, d_path)
def main(): paths = get_all_file_from_dir(JSON_DIR) for i, p in enumerate(paths): # if i != 117: # continue print(i) print(p) _, file_name = os.path.split(p) print(file_name) content = read_all_content(p, encoding='gbk') obj = json.loads(content) save_image(obj['imageData'], i, file_name) save_txt(obj['shapes'], i, file_name)
def delete_6(): paths = get_all_file_from_dir(PARENT_DIR_PATH) for p in paths: _, name = os.path.split(p) label = name.split('-')[0] is_move = False for c in 'QWERTYUIOPLKJHGFDSAZXCVBNM': if label.find(c) >= 0: is_move = True break if is_move: print(p) shutil.move(p, DES_PATH + name)
def get_file_mapping(files_path, file_type='jpg'): """ get mapping :param files_path: :param file_type: :return: """ mapping = {} files_path = get_all_file_from_dir(files_path) for p in files_path: if p.find(file_type) >= 0: _, name = os.path.split(p) mapping[name.split('.')[0]] = p return mapping
def main(): paths = get_all_file_from_dir(JSON_DIR) for i, p in enumerate(paths): # if i != 117: # continue if '.json' not in p: continue print(i) print(p) _, file_name = os.path.split(p) file_name = file_name.replace('.json', '') print(file_name) content = read_all_content(p) obj = json.loads(content) save_image(obj['imageData'], i, file_name)
def move(): for dirpath, dirnames, filenames in os.walk(PARENT_DIR_PATH): for d in dirnames: try: sub_dir = os.path.join(dirpath, d) files = get_all_file_from_dir(sub_dir) _, image_name = os.path.split(files[0]) try: if len(files) <= 4: for p in files: shutil.move(p, DES_PATH + image_name) print('delete' + sub_dir) continue except Exception as e: print(e) except Exception as e: print(e)
def main(): paths = get_all_file_from_dir(JSON_DIR) # try: for i, p in enumerate(paths): try: if '.json' not in p: continue print(i) if i < 0: continue content = read_all_content(p) obj = json.loads(content) file_name = os.path.split(p)[1].split('.')[0] # file_name = get_file_name(obj['imageData']) save_image(obj['imageData'], i, file_name) save_txt(obj['shapes'], i, file_name) except: pass
def main(): txt_and_image_paths = 'D:/label_result_2020_3_10/label_result/箱门中文字识别/txt' des_line_dir = 'D:/label_result_2020_3_10/label_result/箱门中文字识别/line/' paths = get_all_file_from_dir(txt_and_image_paths) try: for i, p in enumerate(paths): if '.jpg' not in p: continue img_name = os.path.split(p)[1].replace('.jpg', '') img = read_image(p) txt_path = p.replace('.jpg', '.txt') points, is_error = get_points(txt_path) if is_error: print(p) draw_image = get_detect_result(points, img) write_image(des_line_dir + img_name + '.jpg', draw_image) except Exception as e: print(e)
def main(): for dirpath, dirnames, filenames in os.walk(num_path): for d in dirnames: sub_dir = os.path.join(dirpath, d) files = get_all_file_from_dir(sub_dir) if len(files) == 0: # delete_file_in_dir(sub_dir) print('delete' + sub_dir) continue _, image_name = os.path.split(files[0]) is_delete = False for c in 'QWERTYUIOPLKJHGFDSAZXCVBNM': if image_name.find(c) >= 0: is_delete = True break if len(files) < 2: for p in files: _, file_name = os.path.split(p) shutil.move(p, DES_path + file_name)
int(float(points[7]))]] } shapes.append(shape) return shapes def load_image_str(image_path): with open(image_path, "rb") as imageFile: image_s = base64.b64encode(imageFile.read()) image_s = str(image_s).replace('b\'', '') image_s = str(image_s).replace('\'', '') return str(image_s) if __name__ == '__main__': paths = get_all_file_from_dir(IMAGE_LABEL_DIR) image_mapping = get_mapping(paths, file_type='jpg') txt_mapping = get_mapping(paths, file_type='txt') txt_mapping_keys = txt_mapping.keys() index = 1 for key in image_mapping.keys(): if key in txt_mapping_keys: try: shapes = load_shapes(txt_mapping[key]) image_str = load_image_str(image_mapping[key]) json_obj = { "flags": {}, "shapes": shapes, "lineColor": [0, 255, 0, 128], "fillColor": [255, 0, 0, 128], "imagePath": "",
------------------------------------------------- File Name: delete_empty_txt Description : Author : 'li' date: 2018/10/14 ------------------------------------------------- Change Activity: 2018/10/14: ------------------------------------------------- """ import os from utility.file_path_utility import get_all_file_from_dir DIR_PATH = 'E:\dataset\detection/training_data/' paths = get_all_file_from_dir(DIR_PATH) for p in paths: is_delete = False if p.find('txt') > 0: with open(p, encoding='utf8', mode='r') as file: lines = file.readlines() if len(lines) == 0: is_delete = True if is_delete: _dir, name = os.path.split(p) new_name = name.replace('txt', 'jpg') try: os.remove(p) os.replace(DIR_PATH + name) except Exception as e:
------------------------------------------------- Change Activity: 2018/8/31: ------------------------------------------------- """ import json import os from utility.file_io_utility import read_all_content from utility.file_path_utility import get_all_file_from_dir, create_dir from xml.dom.minidom import Document __author__ = 'li' result_dir_path = 'C:/Users\lr\Desktop/123' result_paths = get_all_file_from_dir(result_dir_path) save_annotation_dir = './xml/' create_dir(save_annotation_dir) def load_result(result_paths): """ load result :param result_paths: :return: """ for p in result_paths: if p.find('txt') > 0: with open(p, mode='r', encoding='utf8') as file: lines = file.readlines() if len(lines) == 0:
# -*- coding: utf-8 -*- """ ------------------------------------------------- File Name: __init__.py Description : Author : 'li' date: 2018/8/6 ------------------------------------------------- Change Activity: 2018/8/6: ------------------------------------------------- """ import os import shutil from utility.file_path_utility import get_all_file_from_dir dir_path = 'E:\dataset\\11-25\horizontal/' des_path = 'C:\\Users\lr\Desktop\small/' paths = get_all_file_from_dir(dir_path) for path in paths: size = os.path.getsize(path) if size < 700: print(path) _, file_name = os.path.split(path) shutil.move(path, des_path + file_name)
# -*- coding: utf-8 -*- """ ------------------------------------------------- File Name: copy_hang_image Description : Author : 'li' date: 2018/8/18 ------------------------------------------------- Change Activity: 2018/8/18: ------------------------------------------------- """ import shutil import uuid from utility.file_path_utility import get_all_file_from_dir __author__ = 'li' IMAGE_DIR = 'C:/Users\lr\Desktop\error/' HANG_DIR = 'C:/Users\lr\Desktop\error_img/' imgs = get_all_file_from_dir(IMAGE_DIR) index = 1 for i in imgs: index = index + 1 if i.find("txt") >= 0 or i.find("DMG") >= 0: continue new_path = HANG_DIR + str(uuid.uuid4()) + '.jpg' shutil.copyfile(i, new_path)
------------------------------------------------- Change Activity: 2018/8/1: ------------------------------------------------- """ import shutil import cv2 from utility.file_path_utility import get_all_file_from_dir __author__ = 'li' """ 把不同形状的训练集做区分:水平的和垂直的。 """ ORIGINAL_PATH = 'F:/dataset/container_dataset/text_area/' HORIZONTAL_PATH = 'F:/dataset/container_dataset/text_area_horizontal/' VERTICAL_PATH = 'F:/dataset/container_dataset/text_area_vertical/' all_image_path = get_all_file_from_dir(ORIGINAL_PATH) for path in all_image_path: if path.find('.jpg') <= 0: continue img = cv2.imread(path) shape = img.shape image_name = path.split('\\')[-1] if shape[0] >= shape[1]: # 高大于宽 shutil.copy(path, VERTICAL_PATH + image_name) continue shutil.copy(path, HORIZONTAL_PATH + image_name)
# -*- coding: utf-8 -*- """ ------------------------------------------------- File Name: data_validate Description : Author : 'li' date: 2018/9/9 ------------------------------------------------- Change Activity: 2018/9/9: ------------------------------------------------- """ import os from utility.file_path_utility import get_all_file_from_dir __author__ = 'li' images_path = 'F:\dataset\horizontal' paths = get_all_file_from_dir(images_path) for p in paths: _, image_name = os.path.split(p) label = image_name.split('-')[0] for c in label: if not ('1234567890'.find(c) >= 0 or 'QWERTYUIOPLKJHGFDSAZXCVBNM'.find(c) >= 0 or '#'.find(c) >= 0): print(p)
File Name: validate_gen_txt Description : Author : 'li' date: 2018/10/14 ------------------------------------------------- Change Activity: 2018/10/14: ------------------------------------------------- """ import numpy as np from utility.file_path_utility import get_all_file_from_dir TXT_DIR = 'E:\dataset\detection/training_data/' paths = get_all_file_from_dir(TXT_DIR) def polygon_area(poly): ''' compute area of a polygon :param poly: :return: ''' edge = [(poly[1][0] - poly[0][0]) * (poly[1][1] + poly[0][1]), (poly[2][0] - poly[1][0]) * (poly[2][1] + poly[1][1]), (poly[3][0] - poly[2][0]) * (poly[3][1] + poly[2][1]), (poly[0][0] - poly[3][0]) * (poly[0][1] + poly[3][1])] return np.sum(edge) / 2.
date: 2018/7/29 ------------------------------------------------- Change Activity: 2018/7/29: ------------------------------------------------- """ import shutil from utility.file_path_utility import get_all_file_from_dir __author__ = 'li' img_path = 'F:/dataset/second_label/image/' annotation_path = 'F:/dataset/second_label/detection_label/' des_path = 'F:/dataset/second_label/des/' img_path = get_all_file_from_dir(img_path) annotation_path = get_all_file_from_dir(annotation_path) def load_map(img_path): map = {} for path in img_path: name = path.split('\\')[-1].split('.')[0] map[name] = path return map img_map = load_map(img_path) anno_map = load_map(annotation_path) i = 1 for key in anno_map.keys():
------------------------------------------------- File Name: six_image_move Description : Author : 'li' date: 2018/9/23 ------------------------------------------------- Change Activity: 2018/9/23: ------------------------------------------------- """ import os import shutil from utility.delete_file import delete_file_in_dir from utility.file_path_utility import get_all_file_from_dir __author__ = 'li' PARENT_DIR_PATH = 'E:\dataset/new_seg/6/' DES_DIR_PATH = 'E:\dataset\six_uncheck/' for dirpath, dirnames, filenames in os.walk(PARENT_DIR_PATH): for d in dirnames: sub_dir = os.path.join(dirpath, d) files = get_all_file_from_dir(sub_dir) if len(files) <= 2: for f in files: dir_p, name = os.path.split(f) print(f) shutil.move(f, DES_DIR_PATH + name)