示例#1
0
def dataset_expand_landmarks(path_annots,
                             path_dataset,
                             nb_selected=None,
                             nb_total=None,
                             nb_workers=NB_WORKERS):
    """ select and expand over whole dataset

    :param str path_annots: root path to original dataset
    :param str path_dataset: root path to generated dataset
    :param float|int|None nb_selected: portion of selected points
    :param int|None nb_total: add extra points up to total number
    :param int nb_workers: number of jobs running in parallel
    :return list(int):
    """
    list_sets = list_sub_folders(path_annots)
    logging.info('Found sets: %i', len(list_sets))

    _wrap_extend = partial(extend_landmarks,
                           path_dataset=path_dataset,
                           nb_selected=nb_selected,
                           nb_total=nb_total)
    counts = list(
        iterate_mproc_map(_wrap_extend,
                          sorted(list_sets),
                          nb_workers=nb_workers,
                          desc='expand landmarks'))
    return counts
示例#2
0
def collect_triple_dir(paths_landmarks,
                       path_dataset,
                       path_out,
                       coll_dirs=None,
                       scales=None,
                       with_user=False):
    """ collect all subdir up to level of scales with user annotations

    expected annotation structure is <tissue>/<user>_scale-<number>pc/<csv-file>
    expected dataset structure is <tissue>/scale-<number>pc/<image>

    :param [str] paths_landmarks: path to landmarks / annotations
    :param str path_dataset: path to the dataset with images
    :param str path_out: path for exporting statistic
    :param [{}] coll_dirs: list of already exiting collections
    :param [int] scales: list of allowed scales
    :param bool with_user: whether required insert info (as annotation)
    :return [{}]: list of already collections

    >>> coll_dirs, d = collect_triple_dir([update_path('annotations')],
    ...                                   update_path('dataset'), 'output')
    >>> len(coll_dirs) > 0
    True
    >>> 'annotations' in coll_dirs[0]['landmarks'].split(os.sep)
    True
    >>> 'dataset' in coll_dirs[0]['images'].split(os.sep)
    True
    >>> 'output' in coll_dirs[0]['output'].split(os.sep)
    True
    >>> d
    []
    """
    if coll_dirs is None:
        coll_dirs = []
    for path_lnds in paths_landmarks:
        set_name, scale_name = path_lnds.split(os.sep)[-2:]
        scale = parse_path_user_scale(scale_name)[1] \
            if with_user else parse_path_scale(scale_name)
        # if a scale was not recognised in the last folder name
        if np.isnan(scale):
            sub_dirs = list_sub_folders(path_lnds)
            coll_dirs, sub_dirs = collect_triple_dir(sub_dirs, path_dataset,
                                                     path_out, coll_dirs,
                                                     scales, with_user)
            continue
        # skip particular scale if it is not among chosen
        if scales is not None and scale not in scales:
            continue
        coll_dirs.append({
            'landmarks':
            path_lnds,
            'images':
            os.path.join(path_dataset, set_name,
                         TEMPLATE_FOLDER_SCALE % scale),
            'output':
            os.path.join(path_out, set_name, scale_name)
        })
    return coll_dirs, []
示例#3
0
def load_largest_scale(path_set):
    """ in given set find the largest scale and load all landmarks in full size

    :param str path_set: path to image/landmark set
    :return dict: dictionary of ndarray with loaded landmarks in full scale
    """
    scales_folders = [(parse_path_scale(p), os.path.basename(p))
                      for p in list_sub_folders(path_set)]
    if not scales_folders:
        return None
    scale, folder = sorted(scales_folders, reverse=True)[0]

    paths_csv = glob.glob(os.path.join(path_set, folder, '*.csv'))
    scaling = 100. / scale
    names_lnds = {
        os.path.basename(p): load_landmarks_csv(p) * scaling
        for p in paths_csv
    }
    return names_lnds
示例#4
0
def dataset_scale_landmarks(path_dataset,
                            scales=DEFAULT_SCALES,
                            nb_workers=NB_WORKERS):
    """ generate several scales within the same dataset

    :param str path_dataset: path to the source/generated dataset
    :param [inr] scales: created scales
    :param int nb_workers: number of jobs running in parallel
    :return:
    """
    list_sets = list_sub_folders(path_dataset)
    logging.info('Found sets: %i', len(list_sets))

    _wrap_scale = partial(scale_set_landmarks, scales=scales)
    counts = list(
        iterate_mproc_map(_wrap_scale,
                          sorted(list_sets),
                          nb_workers=nb_workers,
                          desc='scaling sets'))
    return counts