예제 #1
0
def create_jpg_folder(root_path_list):
    for path, name in image_path_generator(root_path_list):
        jpg_folder = os.path.join(path, "jpg")
        if not os.path.exists(jpg_folder):
            os.mkdir(jpg_folder)
        img_ori_path = os.path.join(path, name)
        img_target_path = os.path.join(jpg_folder, name)
        shutil.move(img_ori_path, img_target_path)
예제 #2
0
def image_matching_inspect(root_path_list):
    image_without_xml_list = []
    for path, name in image_path_generator(root_path_list):
        image_path = os.path.join(path, name)
        xml_path = os.path.join(path, "xml")
        xml_name = name.replace('.jpg', '.xml').replace('.JPG', '.xml')
        xml_path = os.path.join(xml_path, xml_name)
        if not os.path.exists(xml_path):
            image_without_xml_list.append(image_path)
    return image_without_xml_list
예제 #3
0
def image_rename(root_path_list):
    length = get_image_number(root_path_list)
    rename_file_number = 0

    for path, name in tqdm(image_path_generator(root_path_list), total=length):
        if '.JPG' in name or '.JPEG' in name or '.jpeg' in name:
            rename_file_number += 1
            file_path = os.path.join(path, name)
            file_path_rename = file_path.replace(".JPG", ".jpg").replace('.JPEG', '.jpg').replace('.jpeg', '.jpg')
            os.rename(file_path, file_path_rename)

    print("There are {}/{} images have be renamed".format(rename_file_number, length))
예제 #4
0
def postfix_checkup(root_path_list):
    postfix_invalid_list = []
    length = get_image_number(root_path_list)

    for path, name in tqdm(image_path_generator(root_path_list), total=length):
        if not name.split('.')[-1].lower() in ['jpg', 'jpeg']:
            sub_root_path = path[:path.find(path.split("\\")[-1].split('/')[-1]) - 1]
            name = name.split('.')[0]
            postfix_invalid_list.append(f'{sub_root_path},{name}')
    with open("../result/postfix_invalid_images.txt", 'w', encoding='utf-8') as f:
        f.write('\n'.join(postfix_invalid_list))
    print("There are {}/{} images have invalid postfix".format(len(postfix_invalid_list), length))
예제 #5
0
def dup_hash(root_path_list):
    length = get_image_number(root_path_list)
    hash_dict = {}
    print("正在计算图片哈希值……")
    for path, filename in tqdm(image_path_generator(root_path_list),
                               total=length):
        img_path = os.path.join(path, filename)
        img_hash = image_md5(img_path)
        hash_dict.setdefault(img_hash, [])
        hash_dict[img_hash].append((path, filename))
    with open("../../result/new_dup_hash.json", "w") as f:
        json.dump(hash_dict, f, indent=4)
    print("总共{}张图片,去重后图片数量为{}".format(length, len(hash_dict)))
예제 #6
0
def rm_orientation(root_path_list):
    length = get_image_number(root_path_list)
    for path, name in tqdm(image_path_generator(root_path_list), total=length):
        image_path = os.path.join(path, name)
        img = Image.open(image_path)
        if "exif" in img.info:
            exif_dict = piexif.load(img.info["exif"])
            if piexif.ImageIFD.Orientation in exif_dict['0th']:
                print(image_path)
                orientation = exif_dict["0th"].pop(piexif.ImageIFD.Orientation)
                exif_dict['Exif'][41729] = b'1'
                exif_bytes = piexif.dump(exif_dict)
                img.save(image_path, exif=exif_bytes)
예제 #7
0
def size_checkup_fix(root_path_list, annotations_folder='Annotations', images_folder='JPEGImages'):
    length = get_image_number(root_path_list)
    wrong_annotations = []
    for path, name in tqdm(image_path_generator(root_path_list), total=length):
        image_path = os.path.join(path, name)
        xml_path = path.replace(images_folder, annotations_folder)
        xml_name = name.replace('.jpg', '.xml').replace('.JPG', '.xml')
        xml_path = os.path.join(xml_path, xml_name)
        with open(xml_path, encoding='utf-8') as f:
            tree = ET.parse(f)
            tree_root = tree.getroot()
            size_element = tree_root.find('size')
            xml_width = size_element.find('width').text
            xml_height = size_element.find('height').text
        with Image.open(image_path) as im:
            img_width, img_height = im.size
        if not (int(xml_width) == img_width and int(xml_height) == img_height):
            size_element.find('width').text = str(img_width)
            size_element.find('height').text = str(img_height)
            tree.write(xml_path)
        with open(xml_path, encoding='utf-8') as f:
            tree = ET.parse(f)
            tree_root = tree.getroot()
            for obj in tree_root.iter('object'):
                bndbox_element = obj.find("bndbox")
                xmin = int(bndbox_element.find('xmin').text)
                xmax = int(bndbox_element.find('xmax').text)
                ymin = int(bndbox_element.find('ymin').text)
                ymax = int(bndbox_element.find('ymax').text)
                if xmin < 0 or xmax > int(xml_width) or ymin < 0 or ymax > int(xml_height):
                    sub_root_path = path[:path.find(path.split("\\")[-1].split('/')[-1]) - 1]
                    name = name.split('.')[0]
                    wrong_annotations.append(f'{sub_root_path},{name}')
                    break
    # 输出统计信息
    print(f"{len(wrong_annotations)}/{length} files have wrong annotation")
    # 写入文件
    with open("../result/select_by_classes.txt", 'w', encoding='utf-8') as f:
        f.write('\n'.join(wrong_annotations))
예제 #8
0
def valid_checkup(root_path_list):
    broken_list = []
    length = get_image_number(root_path_list)

    for path, name in tqdm(image_path_generator(root_path_list), total=length):
        image_path = os.path.join(path, name)
        try:
            with open(image_path, 'rb') as f:
                value_buf = f.read()
            img_np = np.frombuffer(value_buf, np.uint8)
            img = cv.imdecode(img_np, IMREAD_COLOR)
            cv.cvtColor(img, cv.COLOR_BGR2RGB, img)
            # if img.shape is None:
            #     sub_root_path = path[:path.find(path.split("\\")[-1].split('/')[-1]) - 1]
            #     name = name.split('.')[0]
            #     broken_list.append(f'{sub_root_path},{name}')
        except Exception:
            sub_root_path = path[:path.find(path.split("\\")[-1].split('/')[-1])-1]
            name = name.split('.')[0]
            broken_list.append(f'{sub_root_path},{name}')
    with open("../result/checkup_broken_images.txt", 'w', encoding='utf-8') as f:
        f.write('\n'.join(broken_list))
    print("There are {}/{} images broken".format(len(broken_list), length))
예제 #9
0
def get_image_number(root_path_list):
    root_path_list = root_path_list.copy()
    return sum(1 for _, _ in image_path_generator(root_path_list))