Exemplo n.º 1
0
    def test_count_transitions_segment(self):
        img = self.img[:, :, 0]
        annot = self.annot.astype(int)

        slic = segment_slic_img2d(img, sp_size=15, relative_compact=0.2)
        label_hist = histogram_regions_labels_norm(slic, annot)
        labels = np.argmax(label_hist, axis=1)
        trans = count_label_transitions_connected_segments({'a': slic}, {'a': labels})
        path_csv = os.path.join(PATH_OUTPUT, 'labels_transitions.csv')
        pd.DataFrame(trans).to_csv(path_csv)
        gc_regul = compute_pairwise_cost_from_transitions(trans, 10.)

        np.random.seed(0)
        features = np.tile(labels, (5, 1)).T.astype(float)
        features += np.random.random(features.shape) - 0.5

        gmm = estim_class_model_gmm(features, 4)
        proba = gmm.predict_proba(features)

        segment_graph_cut_general(slic, proba, gc_regul)
Exemplo n.º 2
0
def main_train(params):
    """ the main composed from following steps:
    1) load already computed data (features and labels) or compute them now
    2) visualise labeled superpixels aka annotation
    3) load or train classifier with hyper-parameters search
    4) perform Leave-One-Out and Leave-P-Out experiments on images

    :param {str: ...} params:
    :return {str: ...}:
    """
    logging.getLogger().setLevel(logging.DEBUG)
    logging.info('running TRAINING...')
    show_visual = params.get('visual', False)

    reload_dir_config = (os.path.isfile(params.get('path_config', ''))
                         or FORCE_RELOAD)
    params = tl_expt.create_experiment_folder(params,
                                              dir_name=NAME_EXPERIMENT,
                                              stamp_unique=params.get(
                                                  'unique',
                                                  EACH_UNIQUE_EXPERIMENT),
                                              skip_load=reload_dir_config)
    tl_expt.set_experiment_logger(params['path_exp'])
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))
    tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE)
    if show_visual:
        tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG)
    df_stat = pd.DataFrame()

    path_dump = os.path.join(params['path_exp'], NAME_DUMP_TRAIN_DATA)
    if os.path.isfile(path_dump) and not FORCE_RECOMP_DATA:
        dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \
        dict_label_hist, feature_names = load_dump_data(path_dump)
    else:
        dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \
        dict_label_hist, feature_names = \
            dataset_load_images_annot_compute_features(params,
                                                       show_visual)
        save_dump_data(path_dump, dict_imgs, dict_annot, dict_slics,
                       dict_features, dict_labels, dict_label_hist,
                       feature_names)
    assert len(dict_imgs) > 1, 'training require at least 2 images'

    dict_annot_slic = {
        n: np.asarray(dict_labels[n])[dict_slics[n]]
        for n in dict_annot
    }
    df = eval_segment_with_annot(params, dict_annot, dict_annot_slic,
                                 dict_label_hist,
                                 NAME_CSV_SEGM_STAT_SLIC_ANNOT,
                                 params.get('drop_labels',
                                            None), params['nb_jobs'])
    df_stat = df_stat.append(get_summary(df, 'SLIC-annot'), ignore_index=True)
    path_csv_stat = os.path.join(params['path_exp'],
                                 NAME_CSV_SEGM_STAT_RESULTS)
    df_stat.set_index(['name']).to_csv(path_csv_stat)

    if params['gc_use_trans']:
        params['label_transitions'] = \
            seg_gc.count_label_transitions_connected_segments(dict_slics,
                                                              dict_labels)
        logging.info('summary on edge-label transitions: \n %s',
                     repr(params['label_transitions']))

    path_purity_visu = os.path.join(params['path_exp'], FOLDER_SLIC_ANNOT) \
        if show_visual else None
    dict_labels = filter_train_with_purity(dict_imgs,
                                           dict_labels,
                                           dict_label_hist,
                                           params['label_purity'],
                                           dict_slics,
                                           drop_labels=params.get(
                                               'drop_labels', None),
                                           path_visu=path_purity_visu,
                                           nb_jobs=params['nb_jobs'])

    logging.info('prepare features...')
    # concentrate features, labels
    features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset(
        dict_features,
        dict_labels,
        balance_type=params['balance'],
        drop_labels=[-1, np.nan] + params.get('drop_labels', []))
    # drop "do not care" label which are -1
    features = np.nan_to_num(features)

    nb_holdout = params.get('cross_val', CROSS_VAL_LEAVE_OUT_SEARCH)
    nb_holdout = max(1, int(round(len(sizes) * nb_holdout)))  # minimum is 1
    nb_holdout = min(nb_holdout, int(len(sizes) / 2))  # max is half of the set
    params, classif, path_classif = load_train_classifier(
        params, features, labels, feature_names, sizes, nb_holdout)

    def _path_expt(n):
        return os.path.join(params['path_exp'], n)

    # test classif. on  training images
    df_paths = pd.read_csv(params['path_train_list'], index_col=0)
    df_paths.reset_index(inplace=True)
    paths_img = df_paths['path_image'].tolist()
    if RUN_TRAIN_PREDICT:
        perform_train_predictions(params,
                                  paths_img,
                                  classif,
                                  show_debug_imgs=show_visual)
    else:
        write_skip_file(_path_expt(FOLDER_TRAIN))

    gc.collect()
    time.sleep(1)

    # LEAVE P OUT
    if params.get('run_LPO', True):
        idx_paths_img = list(
            zip(df_paths.index.tolist(), df_paths['path_image'].tolist()))
        df_stat = experiment_lpo(params,
                                 df_stat,
                                 dict_annot,
                                 idx_paths_img,
                                 path_classif,
                                 path_dump,
                                 nb_holdout,
                                 show_debug_imgs=show_visual)
    else:
        write_skip_file(_path_expt(FOLDER_LPO))
        # write_skip_file(_path_expt(FOLDER_LPO_VISU))

    logging.info('Statistic: \n %s', repr(df_stat.describe()))
    logging.info('training DONE')
    return params
def main_train(params):
    """ the main composed from following steps:
    1) load already computed data (features and labels) or compute them now
    2) visualise labeled superpixels aka annotation
    3) load or train classifier with hyper-parameters search
    4) perform Leave-One-Out and Leave-P-Out experiments on images

    :param {str: ...} params:
    :return{str: ...} :
    """
    logging.getLogger().setLevel(logging.DEBUG)
    logging.info('running TRAINING...')

    reload_dir_config = (os.path.isfile(params['path_config']) or FORCE_RELOAD)
    params = tl_expt.create_experiment_folder(params, dir_name=NAME_EXPERIMENT,
                                              stamp_unique=EACH_UNIQUE_EXPERIMENT,
                                              skip_load=reload_dir_config)
    tl_expt.set_experiment_logger(params['path_exp'])
    logging.info(tl_expt.string_dict(params, desc='PARAMETERS'))
    tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_BASE)
    if params['visual']:
        tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG)
    df_stat = pd.DataFrame()

    path_dump = os.path.join(params['path_exp'], NAME_DUMP_TRAIN_DATA)
    if os.path.isfile(path_dump) and not FORCE_RECOMP_DATA:
        dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \
        dict_label_hist, feature_names = load_dump_data(path_dump)
    else:
        dict_imgs, dict_annot, dict_slics, dict_features, dict_labels, \
        dict_label_hist, feature_names = \
            dataset_load_images_annot_compute_features(params)
        save_dump_data(path_dump, dict_imgs, dict_annot, dict_slics,
                       dict_features, dict_labels, dict_label_hist,
                       feature_names)
    assert len(dict_imgs) > 1, 'training require at least 2 images'

    dict_annot_slic = {n: np.asarray(dict_labels[n])[dict_slics[n]]
                       for n in dict_annot}
    df = eval_segment_with_annot(params, dict_annot, dict_annot_slic,
                                 dict_label_hist, NAME_CSV_SEGM_STAT_SLIC_ANNOT,
                                 params['nb_jobs'])
    df_stat = df_stat.append(get_summary(df, 'SLIC-annot'), ignore_index=True)
    path_csv_stat = os.path.join(params['path_exp'], NAME_CSV_SEGM_STAT_RESULTS)
    df_stat.set_index(['name']).to_csv(path_csv_stat)

    if params['gc_use_trans']:
        params['label_transitions'] = \
            seg_gc.count_label_transitions_connected_segments(dict_slics,
                                                              dict_labels)
        logging.info('summary on edge-label transitions: \n %s',
                     repr(params['label_transitions']))

    for name in dict_labels:
        weights = np.max(dict_label_hist[name], axis=1)
        dict_labels[name][weights < params['label_purity']] = -1

    logging.info('prepare features...')
    # concentrate features, labels
    features, labels, sizes = seg_clf.convert_set_features_labels_2_dataset(
        dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1])
    # drop "do not care" label which are -1
    features = np.nan_to_num(features)

    nb_holdout = max(1, int(round(len(sizes) * CROSS_VAL_LEAVE_OUT_SEARCH)))
    params, classif, path_classif = load_train_classifier(params, features,
                                                          labels,  feature_names,
                                                          sizes, nb_holdout)

    # test classif on images
    df_paths = pd.read_csv(params['path_train_list'], index_col=0)
    paths_img = df_paths['path_image'].tolist()
    perform_predictions(params, paths_img, classif)

    # LEAVE ONE OUT
    if RUN_CROSS_VAL_LOO:
        df_stat = experiment_loo(params, df_stat, dict_annot, paths_img,
                                 path_classif, path_dump)

    # LEAVE P OUT
    if RUN_CROSS_VAL_LPO:
        df_stat = experiment_lpo(params, df_stat, dict_annot, paths_img,
                                 path_classif, path_dump, nb_holdout)

    logging.info('training DONE')
    return params