def filter_paired_landmarks(item, path_dataset, path_reference, col_source, col_target): """ filter all relevant landmarks which were used and copy them to experiment The case is that in certain challenge stage users had provided just a subset of all image landmarks which could be laos shuffled. The idea is to filter identify all user used (provided in dataset) landmarks and filter them from temporary reference dataset. :param dict|Series item: experiment DataFrame :param str path_dataset: path to provided landmarks :param str path_reference: path to the complete landmark collection :param str col_source: column name of landmarks to be transformed :param str col_target: column name of landmarks to be compared :return tuple(float,ndarray,ndarray): match ratio, filtered ref and move landmarks >>> p_data = update_path('data-images') >>> p_csv = os.path.join(p_data, 'pairs-imgs-lnds_histol.csv') >>> df = pd.read_csv(p_csv) >>> ratio, lnds_ref, lnds_move = filter_paired_landmarks(dict(df.iloc[0]), p_data, p_data, ... ImRegBenchmark.COL_POINTS_MOVE, ImRegBenchmark.COL_POINTS_REF) >>> ratio 1.0 >>> lnds_ref.shape == lnds_move.shape True """ path_ref = update_path(item[col_source], pre_path=path_reference) if not os.path.isfile(path_ref): raise FileNotFoundError('missing landmarks: %s' % path_ref) path_load = update_path(item[col_source], pre_path=path_dataset) if not os.path.isfile(path_load): raise FileNotFoundError('missing landmarks: %s' % path_load) pairs = common_landmarks(load_landmarks(path_ref), load_landmarks(path_load), threshold=1) if not pairs.size: logging.warning( 'there is not pairing between landmarks or dataset and user reference' ) return 0., np.empty([0]), np.empty([0]) pairs = sorted(pairs.tolist(), key=lambda p: p[1]) ind_ref = np.asarray(pairs)[:, 0] nb_common = min([ len(load_landmarks(update_path(item[col], pre_path=path_reference))) for col in (col_target, col_source) ]) ind_ref = ind_ref[ind_ref < nb_common] path_lnd_ref = update_path(item[col_target], pre_path=path_reference) lnds_filter_ref = load_landmarks(path_lnd_ref)[ind_ref] path_lnd_move = update_path(item[col_source], pre_path=path_reference) lnds_filter_move = load_landmarks(path_lnd_move)[ind_ref] ratio_matches = len(ind_ref) / float(nb_common) if ratio_matches > 1: raise ValueError( 'suspicious ratio for %i paired and %i common landmarks' % (len(pairs), nb_common)) return ratio_matches, lnds_filter_ref, lnds_filter_move
def filter_landmarks(idx_row, path_output, path_dataset, path_reference): """ filter all relevant landmarks which were used and copy them to experiment :param (idx, {}|Series) idx_row: experiment DataFrame :param str path_output: path to output folder :param str path_dataset: path to provided landmarks :param str path_reference: path to the complete landmark collection :return (idx, float): record index and match ratio """ idx, row = idx_row path_ref = update_path_(row[COL_POINTS_MOVE], path_reference) path_load = update_path_(row[COL_POINTS_MOVE], path_dataset) pairs = common_landmarks(load_landmarks(path_ref), load_landmarks(path_load), threshold=1) pairs = sorted(pairs.tolist(), key=lambda p: p[1]) ind_ref = np.asarray(pairs)[:, 0] # moving and reference landmarks for col in [COL_POINTS_REF, COL_POINTS_MOVE]: path_in = update_path_(row[col], path_reference) path_out = update_path_(row[col], path_output) create_folder(os.path.dirname(path_out), ok_existing=True) save_landmarks(path_out, load_landmarks(path_in)[ind_ref]) # save ratio of found landmarks len_lnds_ref = len( load_landmarks(update_path_(row[COL_POINTS_REF], path_reference))) ratio_matches = len(pairs) / float(len_lnds_ref) return idx, ratio_matches