from nonconformist.nc import MarginErrFunc from nonconformist.nc import ClassifierNc, RegressorNc, RegressorNormalizer from nonconformist.nc import AbsErrorErrFunc, SignErrorErrFunc from nonconformist.evaluation import cross_val_score from nonconformist.evaluation import ClassIcpCvHelper, RegIcpCvHelper from nonconformist.evaluation import class_avg_c, class_mean_errors from nonconformist.evaluation import reg_mean_errors, reg_median_size # ----------------------------------------------------------------------------- # Classification # ----------------------------------------------------------------------------- data = load_iris() icp = IcpClassifier( ClassifierNc(ClassifierAdapter(RandomForestClassifier(n_estimators=100)), MarginErrFunc())) icp_cv = ClassIcpCvHelper(icp) scores = cross_val_score(icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[class_mean_errors, class_avg_c], significance_levels=[0.05, 0.1, 0.2]) print('Classification: iris') scores = scores.drop(['fold', 'iter'], axis=1) print(scores.groupby(['significance']).mean()) # -----------------------------------------------------------------------------
def test_acp_classification_tree(self): # ----------------------------------------------------------------------------- # Experiment setup # ----------------------------------------------------------------------------- data = load_iris() idx = np.random.permutation(data.target.size) train = idx[:int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] truth = data.target[test].reshape(-1, 1) columns = ["C-{}".format(i) for i in np.unique(data.target)] + ["truth"] significance = 0.1 # ----------------------------------------------------------------------------- # Define models # ----------------------------------------------------------------------------- models = { "ACP-RandomSubSampler": AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), RandomSubSampler(), ), "ACP-CrossSampler": AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), CrossSampler(), ), "ACP-BootstrapSampler": AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), BootstrapSampler(), ), "CCP": CrossConformalClassifier( IcpClassifier( ClassifierNc(ClassifierAdapter( DecisionTreeClassifier())))), "BCP": BootstrapConformalClassifier( IcpClassifier( ClassifierNc(ClassifierAdapter( DecisionTreeClassifier())))), } # ----------------------------------------------------------------------------- # Train, predict and evaluate # ----------------------------------------------------------------------------- for name, model in models.items(): model.fit(data.data[train, :], data.target[train]) prediction = model.predict(data.data[test, :], significance=significance) table = np.hstack((prediction, truth)) df = pd.DataFrame(table, columns=columns) print("\n{}".format(name)) print("Error rate: {}".format( class_mean_errors(prediction, truth, significance))) print(df) self.assertTrue(True)
# Calibrate the ICP using the calibration set icp.calibrate(x_val_np, y_val_np) print('predicting inductive conformal prediction') # Produce predictions for the test set, with confidence 95% prediction = icp.predict(x_test_np, significance=0.05) prediction_conf_cred = pd.DataFrame( icp.predict_conf(x_test_np), columns=['Label', 'Confidence', 'Credibility']) # %% #Cross validation of the conformal predictor #icp = IcpClassifier(ClassifierNc(ClassifierAdapter(model),MarginErrFunc())) icp = OobCpClassifier( ClassifierNc( OobClassifierAdapter( RandomForestClassifier(n_estimators=300, oob_score=True)))) significance = np.arange(0, 1, 0.025) significance[0] = 0.01 icp_cv = ClassIcpCvHelper(icp) scores = cross_val_score(icp_cv, x_train_np, y_train_np, iterations=1, folds=5, scoring_funcs=[ class_mean_errors, class_one_err, class_avg_c, class_one_c, class_empty, class_two_c ],
X, H = sample_hmm(N, L, H_n, start_prob, trans_prob, emi_means, emi_vars) # Training and test sets. The test set is only # composed by the last sampled sequence. train = range(N - 1) Xtrain = X[train] Htrain = H[train] Xtest = X[N - 1] Htest = H[N - 1] n, l, _ = X.shape X = X.flatten() H = H.flatten() lengths = [l] * n # NCM knn = KNeighborsClassifier(n_neighbors=1) ncm = ClassifierNc(ClassifierAdapter(knn)) cphmm = CPHMM(ncm, n_states=H_n, smooth=False) # HMM trained using Maximum Likelihood. ml_pred = ml_hmm_predict(Xtest, Xtrain, Htrain) ml_error = error(ml_pred, Htest) print("Maximum likelihood error: {}".format(ml_error)) # CP-HMM training and prediction. cphmm.fit(X, H, lengths) CP_pred = cphmm.predict(Xtest, SIGNIFICANCE_LEVEL) CP_error = error(CP_pred[0], Htest) print("CP error: {}".format(CP_error))
def test_cross_validation(self): # ----------------------------------------------------------------------------- # Classification # ----------------------------------------------------------------------------- data = load_iris() icp = IcpClassifier( ClassifierNc( ClassifierAdapter(RandomForestClassifier(n_estimators=100)), MarginErrFunc())) icp_cv = ClassIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[class_mean_errors, class_avg_c], significance_levels=[0.05, 0.1, 0.2], ) print("Classification: iris") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, absolute error # ----------------------------------------------------------------------------- data = load_diabetes() icp = IcpRegressor( RegressorNc( RegressorAdapter(RandomForestRegressor(n_estimators=100)), AbsErrorErrFunc())) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Absolute error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, normalized absolute error # ----------------------------------------------------------------------------- data = load_diabetes() underlying_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) normalizer_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) normalizer = RegressorNormalizer(underlying_model, normalizer_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) icp = IcpRegressor(nc) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Normalized absolute error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, normalized signed error # ----------------------------------------------------------------------------- data = load_diabetes() icp = IcpRegressor( RegressorNc( RegressorAdapter(RandomForestRegressor(n_estimators=100)), SignErrorErrFunc())) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Signed error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, signed error # ----------------------------------------------------------------------------- data = load_diabetes() underlying_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) normalizer_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) # The normalization model can use a different error function than is # used to measure errors on the underlying model normalizer = RegressorNormalizer(underlying_model, normalizer_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, SignErrorErrFunc(), normalizer) icp = IcpRegressor(nc) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Normalized signed error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean())
X_test, y_test = X[test_index], y[test_index] lda = LinearDiscriminantAnalysis(n_components=9) X_train_lda = lda.fit_transform(X_train, y_train) X_test_lda = lda.transform(X_test) x_anomaly_lda = lda.transform(x_anomaly) x_train, x_cal, y_train, y_cal = train_test_split(X_train_lda, y_train, test_size=0.3, shuffle=False, random_state=1) model = KNeighborsClassifier(n_neighbors=5) # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- icp = IcpClassifier(ClassifierNc(ClassifierAdapter(model))) icp.fit(x_train, y_train) icp.calibrate(x_cal, y_cal) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- SIG = 0.2 prediction = icp.predict(X_test_lda, significance=SIG) result = np.sum(prediction, axis=1) zero_sum_correct = (48 - result.sum(axis=0))/48 correct.append(zero_sum_correct) print("the correct prediction") print(result) prediction_anomaly = icp.predict(x_anomaly_lda, significance=SIG)
def CF_qualitative_validation(self): ''' performs validation for conformal qualitative models ''' # Make a copy of original matrices. X = self.X.copy() Y = self.Y.copy() # Total number of class 0 correct predictions. c0_correct_all = 0 # Total number of class 0 incorrect predictions. c0_incorrect_all = 0 # Total number of class 1 correct predictions. c1_correct_all = 0 # Total number of class 1 incorrect predictions c1_incorrect_all = 0 # Total number of instances out of the applicability domain. not_predicted_all = 0 info = [] kf = KFold(n_splits=5, shuffle=True, random_state=46) # Copy Y vector to use it as template to assign predictions Y_pred = copy.copy(Y).tolist() try: for train_index, test_index in kf.split(X): # Generate training and test sets X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] # Create the aggregated conformal classifier. conformal_pred = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) # Fit the conformal classifier to the data conformal_pred.fit(X_train, Y_train) # Perform prediction on test set prediction = conformal_pred.predict( X_test, self.param.getVal('conformalSignificance')) # Assign the prediction the correct index. for index, el in enumerate(test_index): Y_pred[el] = prediction[index] # Iterate over the prediction and check the result for i in range(len(Y_pred)): real = float(Y[i]) predicted = Y_pred[i] if predicted[0] != predicted[1]: if real == 0 and predicted[0] == True: c0_correct_all += 1 if real == 0 and predicted[1] == True: c0_incorrect_all += 1 if real == 1 and predicted[1] == True: c1_correct_all += 1 if real == 1 and predicted[0] == True: c1_incorrect_all += 1 else: not_predicted_all += 1 except Exception as e: LOG.error(f'Qualitative conformal validation' f' failed with exception: {e}') raise e # Get the mean confusion matrix. self.TN = c0_correct_all self.FP = c0_incorrect_all self.TP = c1_correct_all self.FN = c1_incorrect_all not_predicted_all = not_predicted_all info.append(('TP', 'True positives in cross-validation', self.TP)) info.append(('TN', 'True negatives in cross-validation', self.TN)) info.append(('FP', 'False positives in cross-validation', self.FP)) info.append(('FN', 'False negatives in cross-validation', self.FN)) # Compute sensitivity, specificity and MCC try: self.sensitivity = (self.TP / (self.TP + self.FN)) except Exception as e: LOG.error(f'Failed to compute sensibility with' f'exception {e}') self.sensitivity = '-' try: self.specificity = (self.TN / (self.TN + self.FP)) except Exception as e: LOG.error(f'Failed to compute specificity with' f'exception {e}') self.specificity = '-' try: # Compute Matthews Correlation Coefficient self.mcc = (((self.TP * self.TN) - (self.FP * self.FN)) / np.sqrt( (self.TP + self.FP) * (self.TP + self.FN) * (self.TN + self.FP) * (self.TN + self.FN))) except Exception as e: LOG.error(f'Failed to compute Mathews Correlation Coefficient' f'exception {e}') self.mcc = '-' info.append(('Sensitivity', 'Sensitivity in cross-validation', self.sensitivity)) info.append(('Specificity', 'Specificity in cross-validation', self.specificity)) info.append( ('MCC', 'Matthews Correlation Coefficient in cross-validation', self.mcc)) try: # Compute coverage (% of compounds inside the applicability domain) self.conformal_coverage = ( self.TN + self.FP + self.TP + self.FN) / ( (self.TN + self.FP + self.TP + self.FN) + not_predicted_all) except Exception as e: LOG.error(f'Failed to compute conformal coverage with' f'exception {e}') self.conformal_coverage = '-' try: # Compute accuracy (% of correct predictions) self.conformal_accuracy = ( float(self.TN + self.TP) / float(self.FP + self.FN + self.TN + self.TP)) except Exception as e: LOG.error(f'Failed to compute conformal accuracy with' f'exception {e}') self.conformal_accuracy = '-' info.append(('Conformal_coverage', 'Conformal coverage', self.conformal_coverage)) info.append(('Conformal_accuracy', 'Conformal accuracy', self.conformal_accuracy)) results = {} results['quality'] = info #results ['classes'] = prediction return True, results
import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import load_iris from nonconformist.base import ClassifierAdapter from nonconformist.icp import IcpClassifier from nonconformist.nc import ClassifierNc data = load_iris() x, y = data.data, data.target for i, y_ in enumerate(np.unique(y)): y[y == y_] = i n_instances = y.size idx = np.random.permutation(n_instances) train_idx = idx[:int(n_instances / 3)] cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)] test_idx = idx[2 * int(n_instances / 3):] nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier(n_estimators=100))) icp = IcpClassifier(nc) icp.fit(x[train_idx, :], y[train_idx]) icp.calibrate(x[cal_idx, :], y[cal_idx]) print( pd.DataFrame(icp.predict_conf(x[test_idx, :]), columns=['Label', 'Confidence', 'Credibility']))
classification_method = DecisionTreeClassifier() file_name = 'decision_tree.xls' ACP_Random = [] ACP_Cross = [] ACP_Boot = [] CCP = [] BCP = [] # ----------------------------------------------------------------------------- # Define models # ----------------------------------------------------------------------------- models = { 'ACP-RandomSubSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))), RandomSubSampler()), 'ACP-CrossSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))), CrossSampler()), 'ACP-BootstrapSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))), BootstrapSampler()), 'CCP': CrossConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method)))), 'BCP': BootstrapConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))))
# -------------------------------------------------------------------------------------------- # force_prediction save_path = os.getcwd()+'/force_summary/' + framework_name+'/'+model_name+'/' if os.path.exists(save_path) is not True: os.makedirs(save_path) s_folder = StratifiedKFold(n_splits=10, shuffle=True) for index, (train, test) in enumerate(s_folder.split(X, y)): x_train, x_test = X[train], X[test] y_train, y_test = y[train], y[test] truth = y_test.reshape((-1, 1)) # ----------------------------------------------- # BCP conformal_model = BootstrapConformalClassifier(IcpClassifier(ClassifierNc(ClassifierAdapter(simple_model))), n_models=10) conformal_model.fit(x_train, y_train) # ------------------------------------------ # ICP # x_train_sp, x_cal, y_train_sp, y_cal = train_test_split(x_train, y_train, test_size=0.3, shuffle=True, # random_state=1) # nc = NcFactory.create_nc(model=simple_model) # conformal_model = IcpClassifier(nc) # conformal_model.fit(x_train_sp, y_train_sp) # conformal_model.calibrate(x_cal, y_cal) # --------------------------------------------------- # CP # nc = NcFactory.create_nc(model=simple_model)
from nonconformist.evaluation import class_mean_errors # ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = load_iris() idx = np.random.permutation(data.target.size) train = idx[:int(idx.size / 2)] test = idx[int(idx.size / 2):] # ----------------------------------------------------------------------------- # Train and calibrate TCP # ----------------------------------------------------------------------------- tcp = TcpClassifier( ClassifierNc(ClassifierAdapter(SVC(probability=True, gamma='scale')), MarginErrFunc())) tcp.fit(data.data[train, :], data.target[train]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = tcp.predict(data.data[test, :], significance=0.1) header = np.array(['c0', 'c1', 'c2', 'Truth']) table = np.vstack([prediction.T, data.target[test]]).T df = pd.DataFrame(np.vstack([header, table])) print('TCP') print('---') print(df) error_rate = class_mean_errors(tcp.predict(data.data[test, :]),
idx = np.random.permutation(data.target.size) train = idx[: int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3) :] truth = data.target[test].reshape(-1, 1) columns = ["C-{}".format(i) for i in np.unique(data.target)] + ["truth"] significance = 0.1 # ----------------------------------------------------------------------------- # Define models # ----------------------------------------------------------------------------- models = { "ACP-RandomSubSampler": AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), RandomSubSampler(), ), "ACP-CrossSampler": AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), CrossSampler(), ), "ACP-BootstrapSampler": AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), BootstrapSampler(), ), "CCP": CrossConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))) ), "BCP": BootstrapConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier())))
simple_model = RandomForestClassifier(n_estimators=500, criterion='entropy') model_name = "RF(500)" # simple_model = KNeighborsClassifier(n_neighbors=1) # model_name = '1NN' # simple_model = SVC(C=6000.0, gamma=0.001, probability=True) # model_name = "SVM(6000,0.001)" # ----------------------------------------------------------------------------- # Define models # ----------------------------------------------------------------------------- models = { 'ACP-RandomSubSampler': AggregatedCp(IcpClassifier(ClassifierNc(ClassifierAdapter(simple_model))), RandomSubSampler()), 'ACP-CrossSampler': AggregatedCp(IcpClassifier(ClassifierNc(ClassifierAdapter(simple_model))), CrossSampler()), 'ACP-BootstrapSampler': AggregatedCp(IcpClassifier(ClassifierNc(ClassifierAdapter(simple_model))), BootstrapSampler()), 'CCP': CrossConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter(simple_model)))), 'BCP': BootstrapConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter(simple_model)))), } error_summary = []
def CF_qualitative_validation(self): ''' performs validation for conformal qualitative models ''' # Make a copy of original matrices. X = self.X.copy() Y = self.Y.copy() # Number of external validations for the # aggregated conformal estimator. seeds = [5, 7, 35] # Total number of class 0 correct predictions. c0_correct_all = [] # Total number of class 0 incorrect predictions. c0_incorrect_all = [] # Total number of class 1 correct predictions. c1_correct_all = [] # Total number of class 1 incorrect predictions c1_incorrect_all = [] # Total number of instances out of the applicability domain. not_predicted_all = [] results = [] # Iterate over the seeds. try: for i in range(len(seeds)): # Generate training and test sets X_train, X_test,\ Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=i, shuffle=True) # Create the aggregated conformal classifier. conformal_pred = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator), MarginErrFunc())), BootstrapSampler()) # Fit the conformal classifier to the data conformal_pred.fit(X_train, Y_train) # Perform prediction on test set prediction = conformal_pred.predict(X_test, self.conformalSignificance) c0_correct = 0 c1_correct = 0 not_predicted = 0 c0_incorrect = 0 c1_incorrect = 0 # Iterate over the prediction and check the result for i in range(len(Y_test)): real = float(Y_test[i]) predicted = prediction[i] if predicted[0] != predicted[1]: if real == 0 and predicted[0] == True: c0_correct += 1 if real == 0 and predicted[1] == True: c0_incorrect += 1 if real == 1 and predicted[1] == True: c1_correct += 1 if real == 1 and predicted[0] == True: c1_incorrect += 1 else: not_predicted += 1 # Add the results to the lists. c0_correct_all.append(c0_correct) c0_incorrect_all.append(c0_incorrect) c1_correct_all.append(c1_correct) c1_incorrect_all.append(c1_incorrect) not_predicted_all.append(not_predicted) except Exception as e: LOG.error(f'Qualitative conformal validation' f' failed with exception: {e}') raise e # Get the mean confusion matrix. self.TN = np.int(np.mean(c0_correct_all)) self.FP = np.int(np.mean(c0_incorrect_all)) self.TP = np.int(np.mean(c1_correct_all)) self.FN = np.int(np.mean(c1_incorrect_all)) not_predicted_all = np.int(np.mean(not_predicted_all)) results.append(('TP', 'True positives in cross-validation', self.TP)) results.append(('TN', 'True negatives in cross-validation', self.TN)) results.append(('FP', 'False positives in cross-validation', self.FP)) results.append(('FN', 'False negatives in cross-validation', self.FN)) # Compute sensitivity and specificity self.sensitivity = (self.TP / (self.TP + self.FN)) self.specificity = (self.TN / (self.TN + self.FP)) # Compute Matthews Correlation Coefficient self.mcc = (((self.TP * self.TN) - (self.FP * self.FN)) / np.sqrt( (self.TP + self.FP) * (self.TP + self.FN) * (self.TN + self.FP) * (self.TN + self.FN))) results.append(('Sensitivity', 'Sensitivity in cross-validation', self.sensitivity)) results.append(('Specificity', 'Specificity in cross-validation', self.specificity)) results.append( ('MCC', 'Matthews Correlation Coefficient in cross-validation', self.mcc)) # Compute coverage (% of compouds inside the applicability domain) self.conformal_coverage = (self.TN + self.FP + self.TP + self.FN) / ( (self.TN + self.FP + self.TP + self.FN) + not_predicted_all) # Compute accuracy (% of correct predictions) self.conformal_accuracy = float(self.TN + self.TP) / float(self.FP + self.FN + self.TN + self.TP) results.append(('Conformal_coverage', 'Conformal coverage', self.conformal_coverage)) results.append(('Conformal_accuracy', 'Conformal accuracy', self.conformal_accuracy)) return True, (results, )
# ----------------------------------------------------------------- # prediction with significance s_folder = StratifiedKFold(n_splits=10, shuffle=True) for k, (train, test) in enumerate(s_folder.split(X, y)): x_train, x_test = X[train], X[test] y_train, y_test = y[train], y[test] truth = y_test.reshape((-1, 1)) lda = LinearDiscriminantAnalysis(n_components=9) x_train_lda = lda.fit_transform(x_train, y_train) x_test_lda = lda.transform(x_test) model = CrossConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter(simple_model)))) model.fit(x_train_lda, y_train) prediction = model.predict(x_test_lda, significance=None) table = np.hstack((prediction, truth)) result = [ 1 - class_mean_errors(prediction, truth, significance=significance), class_avg_c(prediction, truth, significance=significance) ] if k == 0: summary = result else: summary = np.vstack((summary, result)) print('\nCCP') print('Accuracy: {}'.format(result[0])) print('Average count: {}'.format(result[1]))
from nonconformist.nc import ClassifierNc, RegressorNc from nonconformist.evaluation import cross_val_score from nonconformist.evaluation import ClassIcpCvHelper, RegIcpCvHelper from nonconformist.evaluation import class_avg_c, class_mean_errors from nonconformist.evaluation import reg_mean_errors, reg_median_size # ----------------------------------------------------------------------------- # Classification # ----------------------------------------------------------------------------- data = load_iris() icp = OobCpClassifier( ClassifierNc( OobClassifierAdapter(RandomForestClassifier(n_estimators=100, oob_score=True)) ) ) icp_cv = ClassIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[class_mean_errors, class_avg_c], significance_levels=[0.05, 0.1, 0.2], ) print("Classification: iris")
def build(self): '''Build a new RF model with the X and Y numpy matrices ''' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) results.append(('model', 'model type', 'RF')) conformal = self.param.getVal('conformal') # If tune then call gridsearch to optimize the estimator if self.param.getVal('tune'): LOG.info("Optimizing RF estimator") try: # Check type of model if self.param.getVal('quantitative'): self.estimator = RandomForestRegressor( **self.estimator_parameters) self.optimize(X, Y, self.estimator, self.tune_parameters) # results.append(('model','model type','RF quantitative (optimized)')) else: self.estimator = RandomForestClassifier( **self.estimator_parameters) self.optimize(X, Y, self.estimator, self.tune_parameters) # results.append(('model','model type','RF qualitative (optimized)')) except Exception as e: return False, f'Exception optimizing RF estimator with exception {e}' else: try: if self.param.getVal('quantitative'): self.estimator = RandomForestRegressor( **self.estimator_parameters) if not conformal: LOG.info("Building Quantitative RF model") # results.append(('model', 'model type', 'RF quantitative')) else: self.estimator = RandomForestClassifier( **self.estimator_parameters) if not conformal: LOG.info("Building Qualitative RF model") # results.append(('model', 'model type', 'RF qualitative')) self.estimator.fit(X, Y) except Exception as e: return False, f'Exception building RF estimator with exception {e}' if not conformal: return True, results self.estimator_temp = copy(self.estimator) # Create the conformal estimator try: # Conformal regressor if self.param.getVal('quantitative'): conformal_settings = self.param.getDict('conformal_settings') LOG.info("Building conformal Quantitative RF model") underlying_model = RegressorAdapter(self.estimator_temp) self.normalizing_model = RegressorAdapter( KNeighborsRegressor( n_neighbors=conformal_settings['KNN_NN'])) # normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer(underlying_model, copy(self.normalizing_model), AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor # (RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.estimator.fit(X, Y) # results.append(('model', 'model type', 'conformal RF quantitative')) # Conformal classifier else: LOG.info("Building conformal Qualitative RF model") self.estimator = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) # Fit estimator to the data self.estimator.fit(X, Y) # results.append(('model', 'model type', 'conformal RF qualitative')) except Exception as e: return False, f'Exception building conformal RF estimator with exception {e}' return True, results ## Overriding of parent methods # def CF_quantitative_validation(self): # ''' performs validation for conformal quantitative models ''' # def CF_qualitative_validation(self): # ''' performs validation for conformal qualitative models ''' # def quantitativeValidation(self): # ''' performs validation for quantitative models ''' # def qualitativeValidation(self): # ''' performs validation for qualitative models ''' # def validate(self): # ''' Validates the model and computes suitable model quality scoring values''' # def optimize(self, X, Y, estimator, tune_parameters): # ''' optimizes a model using a grid search over a range of values for diverse parameters''' # def regularProject(self, Xb, results): # ''' projects a collection of query objects in a regular model, for obtaining predictions ''' # def conformalProject(self, Xb, results): # ''' projects a collection of query objects in a conformal model, for obtaining predictions ''' # def project(self, Xb, results): # ''' Uses the X matrix provided as argument to predict Y'''
idx = np.random.permutation(data.target.size) train = idx[:int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] truth = data.target[test].reshape(-1, 1) columns = ['C-{}'.format(i) for i in np.unique(data.target)] + ['truth'] significance = 0.1 # ----------------------------------------------------------------------------- # Define models # ----------------------------------------------------------------------------- models = { 'ACP-RandomSubSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter( DecisionTreeClassifier()))), RandomSubSampler()), 'ACP-CrossSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter( DecisionTreeClassifier()))), CrossSampler()), 'ACP-BootstrapSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter( DecisionTreeClassifier()))), BootstrapSampler()), 'CCP': CrossConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter( DecisionTreeClassifier())))), 'BCP': BootstrapConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter(
from nonconformist.base import ClassifierAdapter from nonconformist.icp import IcpClassifier from nonconformist.nc import ClassifierNc, MarginErrFunc # ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = load_iris() idx = np.random.permutation(data.target.size) train = idx[:int(idx.size / 3)] calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- icp = IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()), MarginErrFunc())) icp.fit(data.data[train, :], data.target[train]) icp.calibrate(data.data[calibrate, :], data.target[calibrate]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(data.data[test, :], significance=0.1) header = np.array(["c0", "c1", "c2", "Truth"]) table = np.vstack([prediction.T, data.target[test]]).T df = pd.DataFrame(np.vstack([header, table])) print(df)
def build(self): '''Build a new RF model with the X and Y numpy matrices ''' if self.failed: return False X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) if self.cv: self.cv = getCrossVal(self.cv, self.estimator_parameters["random_state"], self.n, self.p) if self.tune: if self.quantitative: self.optimize(X, Y, RandomForestRegressor(), self.tune_parameters) results.append( ('model', 'model type', 'RF quantitative (optimized)')) else: self.optimize(X, Y, RandomForestClassifier(), self.tune_parameters) results.append( ('model', 'model type', 'RF qualitative (optimized)')) else: if self.quantitative: log.info("Building Quantitative RF model") self.estimator_parameters.pop('class_weight', None) self.estimator = RandomForestRegressor( **self.estimator_parameters) results.append(('model', 'model type', 'RF quantitative')) else: log.info("Building Qualitative RF model") self.estimator = RandomForestClassifier( **self.estimator_parameters) results.append(('model', 'model type', 'RF qualitative')) if self.conformal: if self.quantitative: underlying_model = RegressorAdapter(self.estimator) normalizing_model = RegressorAdapter( KNeighborsRegressor(n_neighbors=5)) normalizing_model = RegressorAdapter(self.estimator) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.conformal_pred = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.conformal_pred.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal RF quantitative')) else: self.conformal_pred = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator), MarginErrFunc())), BootstrapSampler()) self.conformal_pred.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal RF qualitative')) self.estimator.fit(X, Y) return True, results #### Overriding of parent methods # def CF_quantitative_validation(self): # ''' performs validation for conformal quantitative models ''' # def CF_qualitative_validation(self): # ''' performs validation for conformal qualitative models ''' # def quantitativeValidation(self): # ''' performs validation for quantitative models ''' # def qualitativeValidation(self): # ''' performs validation for qualitative models ''' # def validate(self): # ''' Validates the model and computes suitable model quality scoring values''' # def optimize(self, X, Y, estimator, tune_parameters): # ''' optimizes a model using a grid search over a range of values for diverse parameters''' # def regularProject(self, Xb, results): # ''' projects a collection of query objects in a regular model, for obtaining predictions ''' # def conformalProject(self, Xb, results): # ''' projects a collection of query objects in a conformal model, for obtaining predictions ''' # def project(self, Xb, results): # ''' Uses the X matrix provided as argument to predict Y'''
def build(self): '''Build a new SVM model with the X and Y numpy matrices''' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) # If tune then call gridsearch to optimize the estimator if self.param.getVal('tune'): try: # Check type of model if self.param.getVal('quantitative'): self.optimize(X, Y, svm.SVR(), self.tune_parameters) results.append(('model', 'model type', 'SVM quantitative (optimized)')) else: self.optimize(X, Y, svm.SVC(probability=True), self.tune_parameters) results.append( ('model', 'model type', 'SVM qualitative (optimized)')) LOG.debug('SVM estimator optimized') except Exception as e: LOG.error(f'Exception optimizing SVM' f'estimator with exception {e}') else: try: LOG.info("Building SVM model") if self.param.getVal('quantitative'): LOG.info("Building Quantitative SVM-R model") self.estimator = svm.SVR(**self.estimator_parameters) results.append(('model', 'model type', 'SVM quantitative')) else: self.estimator = svm.SVC(**self.estimator_parameters) results.append(('model', 'model type', 'SVM qualitative')) except Exception as e: LOG.error(f'Exception building SVM' f'estimator with exception {e}') self.estimator.fit(X, Y) self.estimator_temp = copy(self.estimator) if self.param.getVal('conformal'): try: LOG.info("Building aggregated conformal SVM model") if self.param.getVal('quantitative'): underlying_model = RegressorAdapter(self.estimator_temp) # normalizing_model = RegressorAdapter( # KNeighborsRegressor(n_neighbors=5)) normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor( # RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.estimator.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal SVM quantitative')) else: self.estimator = AggregatedCp( IcpClassifier( ClassifierNc( ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) self.estimator.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal SVM qualitative')) except Exception as e: LOG.error(f'Exception building aggregated conformal SVM ' f'estimator with exception {e}') # Fit estimator to the data return True, results
import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import load_iris from nonconformist.base import ClassifierAdapter from nonconformist.icp import IcpClassifier from nonconformist.nc import ClassifierNc data = load_iris() x, y = data.data, data.target for i, y_ in enumerate(np.unique(y)): y[y == y_] = i n_instances = y.size idx = np.random.permutation(n_instances) train_idx = idx[:int(n_instances / 3)] cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)] test_idx = idx[2 * int(n_instances / 3):] nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier())) icp = IcpClassifier(nc) icp.fit(x[train_idx, :], y[train_idx]) icp.calibrate(x[cal_idx, :], y[cal_idx]) print( pd.DataFrame(icp.predict_conf(x[test_idx, :]), columns=['Label', 'Confidence', 'Credibility']))