def call_dir(model: callable, src: Path, dst: Path, min_siz: int = 50, bs: int = None, parts: int = None, stem: str = None, verbose: bool = False, step: int = 100): """ Index image representations Args: model: model for representation src: source directory of images dst: destination directory for indexing min_siz: minimum image size bs: batch size parts: partition data stem: file name verbose: logging flag step: step to log after """ paths_list = [pt for pt in src.iterdir() if pt.suffix in IMG_EXTS] paths = [paths_list[i:i + bs] for i in range(0, len(paths_list), bs)] logger.print_texts(verbose, f'there are {len(paths)} images to index') chunks_bt = [paths] if parts is None else [paths[i:i + parts] for i in range(0, len(paths), parts)] for sidx, chunk in enumerate(chunks_bt): vec_bts = list(_encode_all(model, *chunk, min_siz=min_siz, verbose=verbose, step=step)) vecs = listify_results(vec_bts) part_dst = _prepare_dir(dst, sidx, parts=parts, stem=stem) _dump_data(str(part_dst), vecs, verbose=verbose) logger.print_texts(verbose, f'chunk {sidx + 1} is indexed') del vec_bts del vecs
def _dump_data(dst: str, reprs: list, verbose: bool = True): """ Serialize vectors in file Args: dst: destination file path reprs: vectors to serialize verbose: logging flag """ with open(dst, 'wb') as vecs: pkl.dump(reprs, vecs) logger.print_texts(verbose, f'Saved len(full_dicts) = {len(reprs)}')
def load_dbvecs(vectors_file: str, verbose: bool = True): """ Read serialized vectors Args: vectors_file: directory to store vectors verbose: logging flag Returns: images_dict: list of dictionaries of vectorized images """ with open(vectors_file, 'rb') as vecs: images_dict = pkl.load(vecs) logger.print_texts(verbose, f'{len(images_dict)} vectors are extracted') return images_dict
def dump(src_files: list, dst_dir: Path, func_valid: callable = lambda x: x, verbose: bool = False): """ Writes files in to the destination directory Args: src_files: source files to dump dst_dir: destination directory func_valid: validation function for source file verbose: logging flag """ for idx, src_file in enumerate(src_files): src_file_txt = str(src_file) if func_valid(src_file_txt): dst_file = dst_dir / src_file.name shutil.copy2(src_file_txt, str(dst_file)) logger.print_texts(verbose, f'{idx} data if {len(src_files)} is processed')
def generate_data(src_root: Path, dst_root: Path, h: int = 224, w: int = 224, tr_dir: str = 'train', val_dir: str = 'valid', tst_dir: str = None, lazy_read: bool = False, verbose: bool = True): """ Generate rotation data-set Args: src_root: source root directory dst_root: destination root directory h: height of image w: width of image tr_dir: training directory val_dir: validation directory tst_dir: test directory lazy_read: lazy load of classes verbose: logging flag """ src_dirs = [src_root / tr_dir, src_root / val_dir] dst_dirs = [dst_root / tr_dir, dst_root / val_dir] logger.print_texts( verbose, f'train and validation directories src_dirs = {src_dirs}, dst_dirs = {dst_dirs}' ) _add_tests(src_root, dst_root, src_dirs, dst_dirs, tst_dir=tst_dir, verbose=verbose) tr_mx = _init_rotation_matrix(h=h, w=w) for idx, src_dir in enumerate(src_dirs): if src_dir.exists(): dst_dir = dst_dirs[idx] dst_dir.mkdir(exist_ok=True) generate_classes(src_dir, dst_dir, h=h, w=w, tr_mx=tr_mx, lazy_read=lazy_read, verbose=verbose)
def search_dir(model: Encoder, paths: list, db_vecs: list = None, index: Path = None, n_results: int = None, verbose: bool = False) -> tuple: """ Search files and extract Args: model: model for representation paths: path of images to search db_vecs: database vectors index: index file n_results: number of results verbose: logging flag Returns: res_vecs: result images """ res_vecs = list() src_vec_bts = list(_encode(model, path) for path in paths) src_vecs = [(vec, img, path) for vec_bt, img_bt, path_bt in src_vec_bts for vec, img, path in zip(vec_bt, img_bt, path_bt) if vec_bt is not None] logger.print_texts(verbose, f'len(src_vecs) = {len(src_vecs)} query images are vectorized') dbs_vecs = db_vecs if db_vecs else load_dbvecs(str(index)) logger.print_texts(verbose, f'{len(dbs_vecs)} is loaded from disk') for idx, (vec1, img, pt) in enumerate(src_vecs): dists = search_img(vec1, dbs_vecs, n_results=n_results, verbose=verbose) res_vecs.append((img, dists, pt)) logger.print_texts(verbose, f'result for {idx + 1} out of {len(src_vecs)} extracted , path - {pt}') return res_vecs, db_vecs
def _add_tests(src_root: Path, dst_root: Path, src_dirs: list, dst_dirs: list, tst_dir: str = None, verbose: bool = True): """ Add test directory to source and destination paths Args: src_root: source root dst_root: destination root src_dirs: source directories dst_dirs: destination directories tst_dir: test directory name verbose: logging flag """ if tst_dir: src_dirs += [src_root / tst_dir] dst_dirs += [dst_root / tst_dir] logger.print_texts( verbose, f'test directories src_dirs = {src_dirs}, dst_dirs = {dst_dirs}')
def _encode_all(model: callable, *paths: list, min_siz: int = 50, verbose: bool = False, step: int = 100) -> np.ndarray: """ Extract vector from image Args: model: representation extractor model paths: paths of images min_siz: minimum image size verbose: logging flag step: step to log after Returns: vec: extracted vector path: image path """ ivld_cnt = 0 for idx, path in enumerate(paths): vecs, _, valid_paths = _encode(model, *path, min_siz=min_siz) img_diff = len(path) - len(valid_paths) ivld_cnt += max(img_diff, 0) _log_diff(img_diff, ivld_cnt, verbose) if valid_paths: logger.print_texts(verbose and idx % step == 0, f'{idx} data is processed, out of {len(paths)}, valid - {idx * len(path) - ivld_cnt}') yield vecs, valid_paths
def generate_classes(src_dir: Path, dst_dir: Path, h: int = 224, w: int = 224, tr_mx: dict = None, interpolation: int = cv2.INTER_LINEAR, lazy_read: bool = False, verbose: bool = False): """ Generate rotation data-set for classification Args: src_dir: source directory dst_dir: destination directory h: height of image w: width of image tr_mx: rotation matrices interpolation: interpolation for resize lazy_read: lazy reading of image verbose: logging flag """ dest_dirs = make_dirs(dst_dir) img_dict = { str(p.name): _image_data(p, h, w, interpolation, lazy_read=lazy_read) for p in src_dir.iterdir() if _valid_image(p) } ms = _init_rotation_matrix(h=h, w=w, tr_mx=tr_mx) logger.print_texts( verbose, f'generates classes for dst_dirs = {dst_dir} for rotations {ms}') for idx, k, v in enumerate(img_dict.items()): for d, m in ms.items(): try: im = _read_resize(v, h, w, interpolation) if lazy_read else v mod = im if d == '0' else cv2.warpAffine(im, m, (h, w)) dst = dest_dirs[d] dst_file = str(dst / k) cv2.imwrite(str(dst / k), mod) logger.print_texts(verbose, f'writes {dst_file}') except Exception as ex: print(f'Error on rotating image {k} ', ex) logger.print_texts(verbose, f'{idx} out of {len(img_dict)}')
def _log_diff(img_diff: int, ivld_cnt: int, verbose: bool): if img_diff > 0: logger.print_texts(verbose, f'there are {ivld_cnt} invalid images')