def algo_diagnostics(dataset_config, yatsm_config, X, y, row, col, algo, n_fold, make_plots=True): """ Display algorithm diagnostics for a given X and y Args: dataset_config (dict): dict of dataset configuration options yatsm_config (dict): dict of YATSM algorithm options X (np.ndarray): X feature input used in classification y (np.ndarray): y labeled examples row (np.ndarray): row pixel locations of `y` col (np.ndarray): column pixel locations of `y` algo (sklearn classifier): classifier used from scikit-learn n_fold (int): number of folds for crossvalidation make_plots (bool, optional): show diagnostic plots (default: True) """ # Print algorithm diagnostics without crossvalidation logger.info('<----- DIAGNOSTICS ----->') if hasattr(algo, 'oob_score_'): logger.info('Out of Bag score: {p}'.format(p=algo.oob_score_)) kfold_summary = np.zeros((0, 2)) logger.info('<----------------------->') logger.info('KFold crossvalidation scores:') kf = KFold(y.size, n_folds=n_fold) kfold_summary = np.vstack((kfold_summary, diagnostics.kfold_scores(X, y, algo, kf) )) logger.info('<----------------------->') logger.info('Stratified KFold crossvalidation scores:') kf = StratifiedKFold(y, n_folds=n_fold) kfold_summary = np.vstack((kfold_summary, diagnostics.kfold_scores(X, y, algo, kf) )) logger.info('<----------------------->') logger.info('Spatialized shuffled KFold crossvalidation scores:') kf = diagnostics.SpatialKFold(y, row, col, n_folds=n_fold, shuffle=True) kfold_summary = np.vstack((kfold_summary, diagnostics.kfold_scores(X, y, algo, kf) )) if make_plots: test_names = ['KFold', 'Stratified KFold', 'Spatial KFold (shuffle)' ] plots.plot_crossvalidation_scores(kfold_summary, test_names) logger.info('<----------------------->') if hasattr(algo, 'feature_importances_'): logger.info('Feature importance:') logger.info(algo.feature_importances_) if make_plots: plots.plot_feature_importance(algo, dataset_config, yatsm_config)
def algo_diagnostics(X, y, row, col, algo): """ Display algorithm diagnostics for a given X and y Args: X (np.ndarray): X feature input used in classification y (np.ndarray): y labeled examples row (np.ndarray): row pixel locations of `y` col (np.ndarray): column pixel locations of `y` algo (sklearn classifier): classifier used from scikit-learn """ # Print algorithm diagnostics without crossvalidation logger.info('<----- DIAGNOSTICS ----->') if hasattr(algo, 'oob_score_'): logger.info('Out of Bag score: {p}'.format(p=algo.oob_score_)) kfold_summary = np.zeros((0, 2)) logger.info('<----------------------->') logger.info('KFold crossvalidation scores:') kf = KFold(y.size, n_folds=n_fold) kfold_summary = np.vstack( (kfold_summary, diagnostics.kfold_scores(X, y, algo, kf))) logger.info('<----------------------->') logger.info('Stratified KFold crossvalidation scores:') kf = StratifiedKFold(y, n_folds=n_fold) kfold_summary = np.vstack( (kfold_summary, diagnostics.kfold_scores(X, y, algo, kf))) logger.info('<----------------------->') logger.info('Spatialized shuffled KFold crossvalidation scores:') kf = diagnostics.SpatialKFold(y, row, col, n_folds=n_fold, shuffle=True) kfold_summary = np.vstack( (kfold_summary, diagnostics.kfold_scores(X, y, algo, kf))) if make_plots: test_names = ['KFold', 'Stratified KFold', 'Spatial KFold (shuffle)'] plots.plot_crossvalidation_scores(kfold_summary, test_names) logger.info('<----------------------->') if hasattr(algo, 'feature_importances_'): logger.info('Feature importance:') logger.info(algo.feature_importances_) if make_plots: plots.plot_feature_importance(algo, dataset_config, yatsm_config)
def report(kf): logger.info("<----------------------->") logger.info("%s crossvalidation scores:" % kf.__class__.__name__) try: scores = diagnostics.kfold_scores(X, y, algo, kf) except Exception as e: logger.warning("Could not perform %s cross-validation: %s" % (kf.__class__.__name__, e.message)) else: return scores