Пример #1
0
def run():
    print("Image preprocessing with multi-workers.")
    print(f"RESIZE: {RESIZE}")
    print(f"IMAGENET_DIR: {IMAGENET_DIR}")
    print(f"VISIBLE_SUB_DIRS: {VISIBLE_SUBDIRS}")
    print(f"DEBUG_MODE: {'ON' if DEBUG else 'OFF'}")

    print("Warming up tqdm.")

    for _ in tqdm(range(10)):
        time.sleep(0.1)

    print(f'\nScanning dirs..')
    meta = Metadata.load_or_make(
        # meta_dir=path.join(root, os.pardir),
        remake=REBUILD_METADATA,
        data_dir=IMAGENET_DIR,
        visible_subdirs=['val'] if DEBUG else VISIBLE_SUBDIRS,
        extensions=IMG_EXTENSIONS,
    )

    n_classes = len([v for v in meta.idx_to_samples])
    n_images = sum([len(v) for v in meta.idx_to_samples.values()])
    print(f'Done. {n_classes} classes and {n_images} images are found.')

    num_process = 1 if DEBUG else min(mp.cpu_count(), MAX_N_PROCESS)
    # num_process = mp.cpu_count()
    chunks = chunkify_classes(meta, num_process)

    if DEBUG:
        print('Start single processing for debugging.')
        results = [process(chunks[0])]  # for debugging
    else:
        print(f'Start {num_process} processes.')
        pool = mp.Pool(processes=num_process)
        results = pool.map(process, chunks)
        pool.close()  # no more task
        pool.join()  # wrap up current tasks

    print("Preprocessing completed.")
    print('\n' * num_process)

    saved_total = 0
    passed_total = 0
    error_msgs_total = []
    for saved, passed, error_msgs in results:
        saved_total += saved
        passed_total += passed
        error_msgs_total.extend(error_msgs)
    print(f"[!] {saved_total} saved file(s) / "
          f"{passed_total} ignored (already exist) file(s) / "
          f"{len(error_msgs_total)} error(s).")

    # log errors
    logfile_name = 'errors.txt'
    base = path.normpath(path.join(IMAGENET_DIR, os.pardir))
    dataset = path.basename(IMAGENET_DIR)
    dataset = "_".join([dataset, NEW_DIR_POSTFIX, *map(str, RESIZE)])
    logfile_path = path.join(base, dataset, logfile_name)
    with open(logfile_path, 'w') as f:
        for i, error_msg in enumerate(error_msgs_total):
            f.write(f'[Error {i}] {error_msg}\n')
    print(f"Error messages logged in {logfile_path}. "
          "Top 10 lines are as follows:")
    os.system(f'head -n 10 {logfile_path}')