def dataset_expand_landmarks(path_annots, path_dataset, nb_selected=None, nb_total=None, nb_workers=NB_WORKERS): """ select and expand over whole dataset :param str path_annots: root path to original dataset :param str path_dataset: root path to generated dataset :param float|int|None nb_selected: portion of selected points :param int|None nb_total: add extra points up to total number :param int nb_workers: number of jobs running in parallel :return list(int): """ list_sets = list_sub_folders(path_annots) logging.info('Found sets: %i', len(list_sets)) _wrap_extend = partial(extend_landmarks, path_dataset=path_dataset, nb_selected=nb_selected, nb_total=nb_total) counts = list( iterate_mproc_map(_wrap_extend, sorted(list_sets), nb_workers=nb_workers, desc='expand landmarks')) return counts
def collect_triple_dir(paths_landmarks, path_dataset, path_out, coll_dirs=None, scales=None, with_user=False): """ collect all subdir up to level of scales with user annotations expected annotation structure is <tissue>/<user>_scale-<number>pc/<csv-file> expected dataset structure is <tissue>/scale-<number>pc/<image> :param [str] paths_landmarks: path to landmarks / annotations :param str path_dataset: path to the dataset with images :param str path_out: path for exporting statistic :param [{}] coll_dirs: list of already exiting collections :param [int] scales: list of allowed scales :param bool with_user: whether required insert info (as annotation) :return [{}]: list of already collections >>> coll_dirs, d = collect_triple_dir([update_path('annotations')], ... update_path('dataset'), 'output') >>> len(coll_dirs) > 0 True >>> 'annotations' in coll_dirs[0]['landmarks'].split(os.sep) True >>> 'dataset' in coll_dirs[0]['images'].split(os.sep) True >>> 'output' in coll_dirs[0]['output'].split(os.sep) True >>> d [] """ if coll_dirs is None: coll_dirs = [] for path_lnds in paths_landmarks: set_name, scale_name = path_lnds.split(os.sep)[-2:] scale = parse_path_user_scale(scale_name)[1] \ if with_user else parse_path_scale(scale_name) # if a scale was not recognised in the last folder name if np.isnan(scale): sub_dirs = list_sub_folders(path_lnds) coll_dirs, sub_dirs = collect_triple_dir(sub_dirs, path_dataset, path_out, coll_dirs, scales, with_user) continue # skip particular scale if it is not among chosen if scales is not None and scale not in scales: continue coll_dirs.append({ 'landmarks': path_lnds, 'images': os.path.join(path_dataset, set_name, TEMPLATE_FOLDER_SCALE % scale), 'output': os.path.join(path_out, set_name, scale_name) }) return coll_dirs, []
def load_largest_scale(path_set): """ in given set find the largest scale and load all landmarks in full size :param str path_set: path to image/landmark set :return dict: dictionary of ndarray with loaded landmarks in full scale """ scales_folders = [(parse_path_scale(p), os.path.basename(p)) for p in list_sub_folders(path_set)] if not scales_folders: return None scale, folder = sorted(scales_folders, reverse=True)[0] paths_csv = glob.glob(os.path.join(path_set, folder, '*.csv')) scaling = 100. / scale names_lnds = { os.path.basename(p): load_landmarks_csv(p) * scaling for p in paths_csv } return names_lnds
def dataset_scale_landmarks(path_dataset, scales=DEFAULT_SCALES, nb_workers=NB_WORKERS): """ generate several scales within the same dataset :param str path_dataset: path to the source/generated dataset :param [inr] scales: created scales :param int nb_workers: number of jobs running in parallel :return: """ list_sets = list_sub_folders(path_dataset) logging.info('Found sets: %i', len(list_sets)) _wrap_scale = partial(scale_set_landmarks, scales=scales) counts = list( iterate_mproc_map(_wrap_scale, sorted(list_sets), nb_workers=nb_workers, desc='scaling sets')) return counts