def test_count_transitions_segment(self): img = self.img[:, :, 0] annot = self.annot.astype(int) slic = segment_slic_img2d(img, sp_size=15, relative_compact=0.2) label_hist = histogram_regions_labels_norm(slic, annot) labels = np.argmax(label_hist, axis=1) trans = count_label_transitions_connected_segments({'a': slic}, {'a': labels}) path_csv = os.path.join(PATH_OUTPUT, 'labels_transitions.csv') pd.DataFrame(trans).to_csv(path_csv) gc_regul = compute_pairwise_cost_from_transitions(trans, 10.) np.random.seed(0) features = np.tile(labels, (5, 1)).T.astype(float) features += np.random.random(features.shape) - 0.5 gmm = estim_class_model_gmm(features, 4) proba = gmm.predict_proba(features) segment_graph_cut_general(slic, proba, gc_regul)
def main_train(params): """ the main composed from following steps: 1) load already computed data (features and labels) or compute them now 2) visualise labeled superpixels aka annotation 3) load or train classifier with hyper-parameters search 4) perform Leave-One-Out and Leave-P-Out experiments on images :param {str: ...} params: :return {str: ...}: """ logging.getLogger().setLevel(logging.DEBUG) logging.info('running TRAINING...') show_visual = params.get('visual', False) reload_dir_config = (os.path.isfile(params.get('path_config', '')) or FORCE_RELOAD) params = tl_expt.create_experiment_folder(params, dir_name=NAME_EXPERIMENT, stamp_unique=params.get( 'unique', EACH_UNIQUE_EXPERIMENT), skip_load=reload_dir_config) tl_expt.set_experiment_logger(params['path_exp']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE) if show_visual: tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG) df_stat = pd.DataFrame() path_dump = os.path.join(params['path_exp'], NAME_DUMP_TRAIN_DATA) if os.path.isfile(path_dump) and not FORCE_RECOMP_DATA: dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = load_dump_data(path_dump) else: dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = \ dataset_load_images_annot_compute_features(params, show_visual) save_dump_data(path_dump, dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, dict_label_hist, feature_names) assert len(dict_imgs) > 1, 'training require at least 2 images' dict_annot_slic = { n: np.asarray(dict_labels[n])[dict_slics[n]] for n in dict_annot } df = eval_segment_with_annot(params, dict_annot, dict_annot_slic, dict_label_hist, NAME_CSV_SEGM_STAT_SLIC_ANNOT, params.get('drop_labels', None), params['nb_jobs']) df_stat = df_stat.append(get_summary(df, 'SLIC-annot'), ignore_index=True) path_csv_stat = os.path.join(params['path_exp'], NAME_CSV_SEGM_STAT_RESULTS) df_stat.set_index(['name']).to_csv(path_csv_stat) if params['gc_use_trans']: params['label_transitions'] = \ seg_gc.count_label_transitions_connected_segments(dict_slics, dict_labels) logging.info('summary on edge-label transitions: \n %s', repr(params['label_transitions'])) path_purity_visu = os.path.join(params['path_exp'], FOLDER_SLIC_ANNOT) \ if show_visual else None dict_labels = filter_train_with_purity(dict_imgs, dict_labels, dict_label_hist, params['label_purity'], dict_slics, drop_labels=params.get( 'drop_labels', None), path_visu=path_purity_visu, nb_jobs=params['nb_jobs']) logging.info('prepare features...') # concentrate features, labels features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset( dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1, np.nan] + params.get('drop_labels', [])) # drop "do not care" label which are -1 features = np.nan_to_num(features) nb_holdout = params.get('cross_val', CROSS_VAL_LEAVE_OUT_SEARCH) nb_holdout = max(1, int(round(len(sizes) * nb_holdout))) # minimum is 1 nb_holdout = min(nb_holdout, int(len(sizes) / 2)) # max is half of the set params, classif, path_classif = load_train_classifier( params, features, labels, feature_names, sizes, nb_holdout) def _path_expt(n): return os.path.join(params['path_exp'], n) # test classif. on training images df_paths = pd.read_csv(params['path_train_list'], index_col=0) df_paths.reset_index(inplace=True) paths_img = df_paths['path_image'].tolist() if RUN_TRAIN_PREDICT: perform_train_predictions(params, paths_img, classif, show_debug_imgs=show_visual) else: write_skip_file(_path_expt(FOLDER_TRAIN)) gc.collect() time.sleep(1) # LEAVE P OUT if params.get('run_LPO', True): idx_paths_img = list( zip(df_paths.index.tolist(), df_paths['path_image'].tolist())) df_stat = experiment_lpo(params, df_stat, dict_annot, idx_paths_img, path_classif, path_dump, nb_holdout, show_debug_imgs=show_visual) else: write_skip_file(_path_expt(FOLDER_LPO)) # write_skip_file(_path_expt(FOLDER_LPO_VISU)) logging.info('Statistic: \n %s', repr(df_stat.describe())) logging.info('training DONE') return params
def main_train(params): """ the main composed from following steps: 1) load already computed data (features and labels) or compute them now 2) visualise labeled superpixels aka annotation 3) load or train classifier with hyper-parameters search 4) perform Leave-One-Out and Leave-P-Out experiments on images :param {str: ...} params: :return{str: ...} : """ logging.getLogger().setLevel(logging.DEBUG) logging.info('running TRAINING...') reload_dir_config = (os.path.isfile(params['path_config']) or FORCE_RELOAD) params = tl_expt.create_experiment_folder(params, dir_name=NAME_EXPERIMENT, stamp_unique=EACH_UNIQUE_EXPERIMENT, skip_load=reload_dir_config) tl_expt.set_experiment_logger(params['path_exp']) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE) if params['visual']: tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG) df_stat = pd.DataFrame() path_dump = os.path.join(params['path_exp'], NAME_DUMP_TRAIN_DATA) if os.path.isfile(path_dump) and not FORCE_RECOMP_DATA: dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = load_dump_data(path_dump) else: dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \ dict_label_hist, feature_names = \ dataset_load_images_annot_compute_features(params) save_dump_data(path_dump, dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, dict_label_hist, feature_names) assert len(dict_imgs) > 1, 'training require at least 2 images' dict_annot_slic = {n: np.asarray(dict_labels[n])[dict_slics[n]] for n in dict_annot} df = eval_segment_with_annot(params, dict_annot, dict_annot_slic, dict_label_hist, NAME_CSV_SEGM_STAT_SLIC_ANNOT, params['nb_jobs']) df_stat = df_stat.append(get_summary(df, 'SLIC-annot'), ignore_index=True) path_csv_stat = os.path.join(params['path_exp'], NAME_CSV_SEGM_STAT_RESULTS) df_stat.set_index(['name']).to_csv(path_csv_stat) if params['gc_use_trans']: params['label_transitions'] = \ seg_gc.count_label_transitions_connected_segments(dict_slics, dict_labels) logging.info('summary on edge-label transitions: \n %s', repr(params['label_transitions'])) for name in dict_labels: weights = np.max(dict_label_hist[name], axis=1) dict_labels[name][weights < params['label_purity']] = -1 logging.info('prepare features...') # concentrate features, labels features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset( dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1]) # drop "do not care" label which are -1 features = np.nan_to_num(features) nb_holdout = max(1, int(round(len(sizes) * CROSS_VAL_LEAVE_OUT_SEARCH))) params, classif, path_classif = load_train_classifier(params, features, labels, feature_names, sizes, nb_holdout) # test classif on images df_paths = pd.read_csv(params['path_train_list'], index_col=0) paths_img = df_paths['path_image'].tolist() perform_predictions(params, paths_img, classif) # LEAVE ONE OUT if RUN_CROSS_VAL_LOO: df_stat = experiment_loo(params, df_stat, dict_annot, paths_img, path_classif, path_dump) # LEAVE P OUT if RUN_CROSS_VAL_LPO: df_stat = experiment_lpo(params, df_stat, dict_annot, paths_img, path_classif, path_dump, nb_holdout) logging.info('training DONE') return params