def load_train_classifier(params, features, labels, feature_names, sizes, nb_holdout): logging.info('train classifier...') seg_clf.feature_scoring_selection(features, labels, feature_names, path_out=params['path_exp']) cv = seg_clf.CrossValidatePSetsOut(sizes, nb_hold_out=nb_holdout) # feature norm & train classification fname_classif = seg_clf.TEMPLATE_NAME_CLF.format(params['classif']) path_classif = os.path.join(params['path_exp'], fname_classif) if os.path.isfile(path_classif) and not FORCE_RETRAIN_CLASSIF: logging.info('loading classifier: %s', path_classif) params_local = params.copy() dict_classif = seg_clf.load_classifier(path_classif) classif = dict_classif['clf_pipeline'] params = dict_classif['params'] params.update({k: params_local[k] for k in params_local if k.startswith('path_') or k.startswith('gc_')}) logging.debug('loaded PARAMETERS: %s', repr(params)) else: classif, path_classif = seg_clf.create_classif_train_export( params['classif'], features, labels, cross_val=cv, params=params, feature_names=feature_names, nb_search_iter=params['nb_classif_search'], nb_jobs=params['nb_jobs'], pca_coef=params['pca_coef'], path_out=params['path_exp']) params['path_classif'] = path_classif cv = seg_clf.CrossValidatePSetsOut(sizes, nb_hold_out=nb_holdout) seg_clf.eval_classif_cross_val_scores(params['classif'], classif, features, labels, cross_val=cv, path_out=params['path_exp']) seg_clf.eval_classif_cross_val_roc(params['classif'], classif, features, labels, cross_val=cv, path_out=params['path_exp']) return params, classif, path_classif
def retrain_loo_segment_image(imgs_idx_path, path_classif, path_dump, path_out, path_visu): """ load the classifier, and dumped data, subtract the image, retrain the classif. without it and do the segmentation :param str path_img: path to input image :param str path_classif: path to saved classifier :param str path_dump: path to dumped data :param, str path_out: path to segmentation outputs :return (str, ndarray, ndarray): """ idx, path_img = parse_imgs_idx_path(imgs_idx_path) dict_imgs, _, _, dict_features, dict_labels, _, _ = \ load_dump_data(path_dump) dict_classif = seg_clf.load_classifier(path_classif) classif = dict_classif['clf_pipeline'] params = dict_classif['params'] idx_name = get_idx_name(idx, path_img) for d in [dict_features, dict_labels]: _ = d.pop(idx_name, None) assert (len(dict_imgs) - len(dict_features)) == 1, \ 'no image was dropped from training set' features, labels, _ = seg_clf.convert_set_features_labels_2_dataset( dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1]) classif.fit(features, labels) idx_name, segm, segm_gc = segment_image(imgs_idx_path, params, classif, path_out, path_visu) # gc.collect(), time.sleep(1) return idx_name, segm, segm_gc
def main_predict(path_classif, path_pattern_imgs, path_out, name='SEGMENT___', params_local=None): """ given trained classifier segment new images :param str path_classif: :param str path_pattern_imgs: :param str path_out: :param str name: """ logging.getLogger().setLevel(logging.INFO) logging.info('running PREDICTION...') assert path_pattern_imgs is not None dict_classif = seg_clf.load_classifier(path_classif) classif = dict_classif['clf_pipeline'] params = dict_classif['params'] if params_local is not None: params.update({ k: params_local[k] for k in params_local if k.startswith('path_') or k.startswith('gc_') }) path_out, path_visu = prepare_output_dir(path_pattern_imgs, path_out, name, visual=params.get( 'visual', False)) tl_expt.set_experiment_logger(path_out) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) paths_img = sorted(glob.glob(path_pattern_imgs)) logging.info('found %i images on path "%s"', len(paths_img), path_pattern_imgs) logging.debug('run prediction...') show_debug_imgs = params.get('visual', False) _wrapper_segment = partial( try_segment_image, params=params, classif=classif, path_out=path_out, path_visu=path_visu, show_debug_imgs=show_debug_imgs, ) list_img_path = list(zip([None] * len(paths_img), paths_img)) iterate = tl_expt.WrapExecuteSequence( _wrapper_segment, list_img_path, nb_workers=params['nb_workers'], desc='segmenting images', ) for _ in iterate: gc.collect() time.sleep(1) logging.info('prediction DONE')
def retrain_lpo_segment_image(list_imgs_idx_path, path_classif, path_dump, path_out, path_visu, show_debug_imgs=SHOW_DEBUG_IMAGES): """ load the classifier, and dumped data, subtract the image, retrain the classif without it and do the segmentation :param list(str) list_imgs_idx_path: path to input image :param str path_classif: path to saved classifier :param str path_dump: path to dumped data :param, str path_out: path to segmentation outputs :param bool show_debug_imgs: whether show debug images :return (str, ndarray, ndarray): """ dict_imgs, _, _, dict_features, dict_labels, _, _ = load_dump_data( path_dump) dict_classif = seg_clf.load_classifier(path_classif) classif = dict_classif['clf_pipeline'] params = dict_classif['params'] for idx, path_img in list_imgs_idx_path: idx_name = get_idx_name(idx, path_img) _ = dict_features.pop(idx_name, None) _ = dict_labels.pop(idx_name, None) if (len(dict_imgs) - len(dict_features)) != len(list_imgs_idx_path): raise ValueError( 'subset of %i images was not dropped, training set %i from total %i' % (len(list_imgs_idx_path), len(dict_features), len(dict_imgs))) features, labels, _ = seg_clf.convert_set_features_labels_2_dataset( dict_features, dict_labels, balance_type=params['balance'], drop_labels=[-1, np.nan] + params.get('drop_labels', [])) classif.fit(features, labels) dict_segm, dict_segm_gc = {}, {} for imgs_idx_path in list_imgs_idx_path: idx_name, segm, segm_gc = segment_image( imgs_idx_path, params, classif, path_out, path_visu, show_debug_imgs=show_debug_imgs) dict_segm[idx_name] = segm dict_segm_gc[idx_name] = segm_gc gc.collect() time.sleep(1) return dict_segm, dict_segm_gc
def main(params): """ PIPELINE for new detections :param {str: str} paths: """ logging.info('running...') params = run_train.prepare_experiment_folder(params, FOLDER_EXPERIMENT) # run_train.check_pathes_patterns(paths) tl_expt.set_experiment_logger(params['path_expt']) logging.info('COMPUTER: \n%s', repr(os.uname())) logging.info(tl_expt.string_dict(params, desc='PARAMETERS')) tl_expt.create_subfolders(params['path_expt'], LIST_SUBFOLDER) path_csv = os.path.join(params['path_expt'], NAME_CSV_TRIPLES) df_paths = get_csv_triplets(params['path_list'], path_csv, params['path_images'], params['path_segms'], force_reload=FORCE_RERUN) dict_classif = seg_clf.load_classifier(params['path_classif']) params_clf = dict_classif['params'] params_clf.update(params) logging.info(tl_expt.string_dict(params, desc='UPDATED PARAMETERS')) # perform on new images df_stat = pd.DataFrame() wrapper_detection = partial(load_compute_detect_centers, params=params_clf, path_classif=params['path_classif'], path_output=params['path_expt']) iterate = tl_expt.WrapExecuteSequence(wrapper_detection, df_paths.iterrows(), nb_jobs=params['nb_jobs']) for dict_center in iterate: df_stat = df_stat.append(dict_center, ignore_index=True) df_stat.to_csv(os.path.join(params['path_expt'], NAME_CSV_TRIPLES_TEMP)) df_stat.set_index(['image'], inplace=True) df_stat.to_csv(os.path.join(params['path_expt'], NAME_CSV_TRIPLES)) logging.info('STATISTIC: \n %s', repr(df_stat.describe())) logging.info('DONE')
def load_compute_detect_centers(idx_row, params, classif=None, path_classif='', path_output=''): """ complete pipeline fon input image and seg_pipe, such that load them, generate points, compute features and using given classifier predict labels :param (int, DF:row) idx_row: :param dict params: :param obj classif: :param str path_classif: :param str path_output: :return {str: float}: """ _, row = idx_row dict_center = dict(row) if not classif: dict_classif = seg_clf.load_classifier(path_classif) classif = dict_classif['clf_pipeline'] try: path_show_in = os.path.join(path_output, FOLDER_INPUTS) name, img, segm, _ = run_train.load_image_segm_center( (None, row), path_show_in, params['dict_relabel']) t_start = time.time() _, slic, points, features, feature_names =\ run_train.estim_points_compute_features(name, img, segm, params) dict_detect = run_train.detect_center_candidates( name, img, segm, None, slic, points, features, feature_names, params, path_output, classif) dict_detect['time elapsed'] = time.time() - t_start dict_center.update(dict_detect) dict_center = run_clust.cluster_points_draw_export( dict_center, params, path_output) except Exception: logging.exception('load_compute_detect_centers') gc.collect() time.sleep(1) return dict_center