def build(self): '''Build a new qualitative GNB model with the X and Y numpy matrices''' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) # Build estimator LOG.info('Building GaussianNB model') self.estimator = GaussianNB(**self.estimator_parameters) results.append(('model', 'model type', 'GNB qualitative')) self.estimator.fit(X, Y) if not self.param.getVal('conformal'): return True, results # If conformal, then create aggregated conformal classifier self.estimator_temp = copy(self.estimator) self.estimator = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) # Fit estimator to the data self.estimator.fit(X, Y) results.append(('model', 'model type', 'conformal GNB qualitative')) return True, results
def CF_QualVal(X, Y, estimator, conformalSignificance): """ Qualitative conformal predictor validation""" print("Starting qualitative conformal prediction validation") icp = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(estimator), MarginErrFunc())), BootstrapSampler()) Y = np.asarray(Y).reshape(-1, 1) loo = LeaveOneOut() predictions = [] for train, test in loo.split(X): Xn = [X[i] for i in train] Yn = [Y[i] for i in train] Xn, mux = center(Xn) Xn, wgx = scale(Xn, True) Yn = np.asarray(Yn) Xout = X[test] Yout = Y[test[0]] icp.fit(Xn, Yn) predictions.append(icp.predict(Xout, significance=0.15)) predictions = [(x[0]).tolist() for x in predictions] predictions = np.asarray(predictions) table = np.hstack((predictions, Y)) print('Error rate: {}'.format(class_mean_errors(predictions, Y, 0.15))) print('Class one: ', class_one_c(predictions, Y, 0.15)) return icp
def test_icp_classification_tree(self): # ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = load_iris() idx = np.random.permutation(data.target.size) train = idx[:int(idx.size / 3)] calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- icp = IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()), MarginErrFunc())) icp.fit(data.data[train, :], data.target[train]) icp.calibrate(data.data[calibrate, :], data.target[calibrate]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(data.data[test, :], significance=0.1) header = np.array(["c0", "c1", "c2", "Truth"]) table = np.vstack([prediction.T, data.target[test]]).T df = pd.DataFrame(np.vstack([header, table])) print(df)
def CF_QualCal(X, Y, estimator): """Qualitative conformal predictor calibration""" acp = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(estimator), MarginErrFunc())), BootstrapSampler()) acp.fit(X, Y) # X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=0.30, random_state=42) # icp = IcpClassifier(ClassifierNc(ClassifierAdapter(estimator), # MarginErrFunc())) # icp.fit(X_train, y_train) # icp.calibrate(X_test, y_test) return acp
def SelectLabeled(self, labeled_data_x, labeled_data_y, unlabeled_data_x): # just append train data to labeled data labeled_x = np.concatenate( (self.init_labeled_data_x, labeled_data_x )) if len(labeled_data_x) > 0 else self.init_labeled_data_x labeled_y = np.concatenate( (self.init_labeled_data_y, labeled_data_y )) if len(labeled_data_x) > 0 else self.init_labeled_data_y # # create model to predict with confidence and credibility model = ClassifierAdapter( DecisionTreeClassifier(random_state=config.random_state, min_samples_leaf=config.min_samples_leaf)) model_acp = AggregatedCp( IcpClassifier(ClassifierNc(model), smoothing=True), RandomSubSampler()) model_acp.fit(labeled_x, labeled_y) s = model_acp.predict(unlabeled_data_x) # print(s) # # selection method labeled_ind = [ i for i, a in enumerate(s) if 1 - a.min() > config.confidence and a.max() > config.credibility ] unlabeled_ind = [ i for i, a in enumerate(s) if 1 - a.min() < config.confidence or a.max() < config.credibility ] labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x = \ np.take(unlabeled_data_x, labeled_ind, axis=0), np.take(s.argmax(axis=1), labeled_ind), np.take( unlabeled_data_x, unlabeled_ind, axis=0) # return labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x
def SelectLabeled(self, labeled_data_x, labeled_data_y, unlabeled_data_x): # just append train data to labeled data labeled_x = np.concatenate((self.init_labeled_data_x, labeled_data_x)) \ if len(labeled_data_x) > 0 else self.init_labeled_data_x labeled_y = np.concatenate((self.init_labeled_data_y, labeled_data_y)) \ if len(labeled_data_x) > 0 else self.init_labeled_data_y # # create model to predict with confidence and credibility model = ClassifierAdapter( DecisionTreeClassifier(random_state=config.random_state, min_samples_leaf=config.min_samples_leaf)) nc = ClassifierNc(model, MarginErrFunc()) model_icp = IcpClassifier(nc, smoothing=True) model_ccp = CrossConformalClassifier(model_icp) model_ccp.fit(labeled_x, labeled_y) s = model_ccp.predict(unlabeled_data_x) # print(s) # # selection method labeled_ind = [ i for i, a in enumerate(s) if a.max() > config.confidence and 1 - a.min() > config.credibility ] unlabeled_ind = [ i for i, a in enumerate(s) if a.max() < config.confidence or 1 - a.min() < config.credibility ] labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x = \ np.take(unlabeled_data_x, labeled_ind, axis=0), np.take(s.argmax(axis=1), labeled_ind), np.take( unlabeled_data_x, unlabeled_ind, axis=0) # return labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x
def build(self): '''Build a new qualitative GNB model with the X and Y numpy matrices''' if self.failed: return False, "Error initiating model" X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) if self.cv: self.cv = getCrossVal(self.cv, 46, self.n, self.p) if self.quantitative: print("GNB only applies to qualitative data") return False, "GNB only applies to qualitative data" else: print("Building GaussianNB model") print(self.estimator_parameters) self.estimator = GaussianNB(**self.estimator_parameters) results.append(('model', 'model type', 'GNB qualitative')) if self.conformal: self.conformal_pred = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator), MarginErrFunc())), BootstrapSampler()) self.conformal_pred.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal GNB qualitative')) self.estimator.fit(X, Y) return True, results
def test_confidence_credibility(self): data = load_iris() x, y = data.data, data.target for i, y_ in enumerate(np.unique(y)): y[y == y_] = i n_instances = y.size idx = np.random.permutation(n_instances) train_idx = idx[:int(n_instances / 3)] cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)] test_idx = idx[2 * int(n_instances / 3):] nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier())) icp = IcpClassifier(nc) icp.fit(x[train_idx, :], y[train_idx]) icp.calibrate(x[cal_idx, :], y[cal_idx]) print( pd.DataFrame(icp.predict_conf(x[test_idx, :]), columns=["Label", "Confidence", "Credibility"]))
from sklearn.datasets import load_iris from nonconformist.icp import IcpClassifier from nonconformist.nc import ProbEstClassifierNc, margin # ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = load_iris() idx = np.random.permutation(data.target.size) train = idx[: int(idx.size / 3)] calibrate = idx[int(idx.size / 3) : int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3) :] # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- icp = IcpClassifier(ProbEstClassifierNc(DecisionTreeClassifier(), margin)) icp.fit(data.data[train, :], data.target[train]) icp.calibrate(data.data[calibrate, :], data.target[calibrate]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(data.data[test, :], significance=0.1) header = np.array(["c0", "c1", "c2", "Truth"]) table = np.vstack([prediction.T, data.target[test]]).T df = pd.DataFrame(np.vstack([header, table])) print(df)
def build(self): '''Build a new XGBOOST model with the X and Y numpy matrices ''' try: from xgboost.sklearn import XGBClassifier from xgboost.sklearn import XGBRegressor except Exception as e: return False, 'XGboost not found, please revise your environment' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) # If tune then call gridsearch to optimize the estimator if self.param.getVal('tune'): LOG.info("Optimizing XGBOOST estimator") try: # Check type of model if self.param.getVal('quantitative'): self.estimator = XGBRegressor( **self.estimator_parameters) self.optimize(X, Y, self.estimator, self.tune_parameters) results.append(('model','model type','XGBOOST quantitative (optimized)')) else: self.estimator = XGBClassifier( **self.estimator_parameters) params = self.estimator.get_params() params['num_class'] = 2 self.optimize(X, Y, self.estimator, self.tune_parameters) results.append(('model','model type','XGBOOST qualitative (optimized)')) except Exception as e: return False, f'Exception optimizing XGBOOST estimator with exception {e}' else: try: if self.param.getVal('quantitative'): LOG.info("Building Quantitative XGBOOST model") # params = { # 'objective': 'reg:squarederror', # 'missing': -99.99999, # # 'max_depth': 20, # # 'learning_rate': 1.0, # # 'silent': 1, # # 'n_estimators': 25 # } # self.estimator = XGBRegressor(**params) self.estimator = XGBRegressor(**self.estimator_parameters) results.append(('model', 'model type', 'XGBOOST quantitative')) else: LOG.info("Building Qualitative XGBOOST model") # params = { # 'objective': 'binary:logistic', # 'max_depth': 3, # #'learning_rate': 0.7, # #'silent': 1, # 'n_estimators': 100 # } self.estimator = XGBClassifier(**self.estimator_parameters) results.append(('model', 'model type', 'XGBOOST qualitative')) self.estimator.fit(X, Y) print(self.estimator) except Exception as e: raise e return False, f'Exception building XGBOOST estimator with exception {e}' self.estimator_temp = copy(self.estimator) if not self.param.getVal('conformal'): return True, results # Create the conformal estimator try: # Conformal regressor if self.param.getVal('quantitative'): LOG.info("Building conformal Quantitative XGBOOST model") underlying_model = RegressorAdapter(self.estimator_temp) #normalizing_model = RegressorAdapter( #KNeighborsRegressor(n_neighbors=5)) normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer( underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor # (RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.estimator.fit(X, Y) results.append(('model', 'model type', 'conformal XGBOOST quantitative')) # Conformal classifier else: LOG.info("Building conformal Qualitative XGBOOST model") self.estimator = AggregatedCp( IcpClassifier( ClassifierNc( ClassifierAdapter(self.estimator_temp), MarginErrFunc() ) ), BootstrapSampler()) # Fit estimator to the data self.estimator.fit(X, Y) results.append(('model', 'model type', 'conformal XGBOOST qualitative')) except Exception as e: raise e return False, f'Exception building conformal XGBOOST estimator with exception {e}' return True, results ## Overriding of parent methods # def CF_quantitative_validation(self): # ''' performs validation for conformal quantitative models ''' # def CF_qualitative_validation(self): # ''' performs validation for conformal qualitative models ''' # def quantitativeValidation(self): # ''' performs validation for quantitative models ''' # def qualitativeValidation(self): # ''' performs validation for qualitative models ''' # def validate(self): # ''' Validates the model and computes suitable model quality scoring values''' # def optimize(self, X, Y, estimator, tune_parameters): # ''' optimizes a model using a grid search over a range of values for diverse parameters''' # def regularProject(self, Xb, results): # ''' projects a collection of query objects in a regular model, for obtaining predictions ''' # def conformalProject(self, Xb, results): # ''' projects a collection of query objects in a conformal model, for obtaining predictions ''' # def project(self, Xb, results): # ''' Uses the X matrix provided as argument to predict Y'''
def build(self): '''Build a new DL model with the X and Y numpy matrices ''' try: from keras.wrappers.scikit_learn import KerasClassifier from keras.wrappers.scikit_learn import KerasRegressor except Exception as e: return False, 'Keras not found, please revise your environment' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) # If tune then call gridsearch to optimize the estimator if self.param.getVal('tune'): LOG.info("Optimizing Keras estimator") try: # Check type of model if self.param.getVal('quantitative'): self.estimator = KerasRegressor( **self.estimator_parameters) self.optimize(X, Y, self.estimator, self.tune_parameters) results.append(('model', 'model type', 'KERAS quantitative (optimized)')) else: self.estimator = KerasClassifier( **self.estimator_parameters) #params = self.estimator.get_params() #params['num_class'] = 2 self.optimize(X, Y, self.estimator, self.tune_parameters) results.append(('model', 'model type', 'KERAS qualitative (optimized)')) except Exception as e: return False, f'Exception optimizing KERAS estimator with exception {e}' else: try: if self.param.getVal('quantitative'): LOG.info("Building Quantitative KERAS mode") self.estimator = KerasRegressor( build_fn=self.create_model, **self.estimator_parameters, verbose=0) results.append( ('model', 'model type', 'Keras quantitative')) else: LOG.info("Building Qualitative Keras model") self.estimator = KerasClassifier( build_fn=self.create_model, dim=self.X.shape[1], **self.estimator_parameters, verbose=0) results.append( ('model', 'model type', 'Keras qualitative')) self.estimator.fit(X, Y) print(self.estimator) except Exception as e: raise e return False, f'Exception building Keras estimator with exception {e}' self.estimator_temp = clone(self.estimator) if not self.param.getVal('conformal'): return True, results # Create the conformal estimator try: # Conformal regressor if self.param.getVal('quantitative'): LOG.info("Building conformal Quantitative Keras model") underlying_model = RegressorAdapter(self.estimator_temp) normalizing_model = RegressorAdapter( KNeighborsRegressor(n_neighbors=15)) # normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor # (RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.estimator.fit(X, Y) results.append( ('model', 'model type', 'conformal Keras quantitative')) # Conformal classifier else: LOG.info("Building conformal Qualitative Keras model") self.estimator = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) # Fit estimator to the data print('build finished') self.estimator.fit(X, Y) results.append( ('model', 'model type', 'conformal Keras qualitative')) except Exception as e: raise e return False, f'Exception building conformal Keras estimator with exception {e}' return True, []
def split_data(data, n_train, n_test): n_train = n_train * len(data) // (n_train + n_test) n_test = len(data) - n_train ind = np.random.permutation(len(data)) return data[ind[:n_train]], data[ind[n_train:n_train + n_test]] #data = Orange.data.Table("../data/usps.tab") data = Orange.data.Table("iris") for sig in np.linspace(0.0, 0.4, 11): errs, szs = [], [] for rep in range(10): #train, test = split_data(data, 7200, 2098) train, test = split_data(data, 2, 1) train, calib = split_data(train, 2, 1) #icp = IcpClassifier(ProbEstClassifierNc(DecisionTreeClassifier(), margin)) icp = IcpClassifier(ProbEstClassifierNc(LogisticRegression(), margin)) #icp = ICP() icp.fit(train.X, train.Y) icp.calibrate(calib.X, calib.Y) pred = icp.predict(test.X, significance=sig) acc = sum(p[y] for p, y in zip(pred, test.Y)) / len(pred) err = 1 - acc sz = sum(sum(p) for p in pred) / len(pred) errs.append(err) szs.append(sz) print(sig, np.mean(errs), np.mean(szs))
# 'ACP-CrossSampler' : AggregatedCp( # IcpClassifier( # ClassifierNc( # ClassifierAdapter(gbm))), # CrossSampler()) # # 'ACP-BootstrapSampler' : AggregatedCp( # # IcpClassifier( # # ClassifierNc( # # ClassifierAdapter(DecisionTreeClassifier()))), # # BootstrapSampler()), # # 'CCP' : CrossConformalClassifier( # # IcpClassifier( # # ClassifierNc( # # ClassifierAdapter(DecisionTreeClassifier())))), # # 'BCP' : BootstrapConformalClassifier( # # IcpClassifier( # # ClassifierNc( # # ClassifierAdapter(DecisionTreeClassifier())))) # } model = AggregatedCp( IcpClassifier( ClassifierNc( ClassifierAdapter(gbm))), CrossSampler()) model.fit(x_train, y_train) print('predicting') prediction = model.predict(x_test, significance=None) np.savetxt(os.getcwd()+"/prediction/prediction_acp_cross_1.txt", prediction, delimiter=',')
idx = np.random.permutation(data.target.size) train = idx[: int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3) :] truth = data.target[test].reshape(-1, 1) columns = ["C-{}".format(i) for i in np.unique(data.target)] + ["truth"] significance = 0.1 # ----------------------------------------------------------------------------- # Define models # ----------------------------------------------------------------------------- models = { "ACP-RandomSubSampler": AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), RandomSubSampler(), ), "ACP-CrossSampler": AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), CrossSampler(), ), "ACP-BootstrapSampler": AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), BootstrapSampler(), ), "CCP": CrossConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))) ), "BCP": BootstrapConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier())))
from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import load_iris from nonconformist.base import ClassifierAdapter from nonconformist.icp import IcpClassifier from nonconformist.nc import ClassifierNc data = load_iris() x, y = data.data, data.target for i, y_ in enumerate(np.unique(y)): y[y == y_] = i n_instances = y.size idx = np.random.permutation(n_instances) train_idx = idx[:int(n_instances / 3)] cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)] test_idx = idx[2 * int(n_instances / 3):] nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier())) icp = IcpClassifier(nc) icp.fit(x[train_idx, :], y[train_idx]) icp.calibrate(x[cal_idx, :], y[cal_idx]) print( pd.DataFrame(icp.predict_conf(x[test_idx, :]), columns=["Label", "Confidence", "Credibility"]))
# ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = Orange.data.Table('iris') X, y = data.X, data.Y idx = np.random.permutation(y.size) train = idx[:idx.size // 3] calibrate = idx[idx.size // 3:2 * idx.size // 3] test = idx[2 * idx.size // 3:] # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- icp = IcpClassifier( ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability)) icp.fit(X[train, :], y[train]) icp.calibrate(X[calibrate, :], y[calibrate]) ccp = CrossConformalClassifier( IcpClassifier( ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability))) ccp.fit(X[train, :], y[train]) acp = AggregatedCp( IcpClassifier( ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability)), CrossSampler()) acp.fit(X[train, :], y[train]) # -----------------------------------------------------------------------------
from nonconformist.base import ClassifierAdapter from nonconformist.icp import IcpClassifier from nonconformist.nc import ClassifierNc, MarginErrFunc # ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = load_iris() idx = np.random.permutation(data.target.size) train = idx[:int(idx.size / 3)] calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- icp = IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()), MarginErrFunc())) icp.fit(data.data[train, :], data.target[train]) icp.calibrate(data.data[calibrate, :], data.target[calibrate]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(data.data[test, :], significance=0.1) header = np.array(["c0", "c1", "c2", "Truth"]) table = np.vstack([prediction.T, data.target[test]]).T df = pd.DataFrame(np.vstack([header, table])) print(df)
def build(self): '''Build a new RF model with the X and Y numpy matrices ''' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) results.append(('model', 'model type', 'RF')) conformal = self.param.getVal('conformal') # If tune then call gridsearch to optimize the estimator if self.param.getVal('tune'): LOG.info("Optimizing RF estimator") try: # Check type of model if self.param.getVal('quantitative'): self.estimator = RandomForestRegressor( **self.estimator_parameters) self.optimize(X, Y, self.estimator, self.tune_parameters) # results.append(('model','model type','RF quantitative (optimized)')) else: self.estimator = RandomForestClassifier( **self.estimator_parameters) self.optimize(X, Y, self.estimator, self.tune_parameters) # results.append(('model','model type','RF qualitative (optimized)')) except Exception as e: return False, f'Exception optimizing RF estimator with exception {e}' else: try: if self.param.getVal('quantitative'): self.estimator = RandomForestRegressor( **self.estimator_parameters) if not conformal: LOG.info("Building Quantitative RF model") # results.append(('model', 'model type', 'RF quantitative')) else: self.estimator = RandomForestClassifier( **self.estimator_parameters) if not conformal: LOG.info("Building Qualitative RF model") # results.append(('model', 'model type', 'RF qualitative')) self.estimator.fit(X, Y) except Exception as e: return False, f'Exception building RF estimator with exception {e}' if not conformal: return True, results self.estimator_temp = copy(self.estimator) # Create the conformal estimator try: # Conformal regressor if self.param.getVal('quantitative'): conformal_settings = self.param.getDict('conformal_settings') LOG.info("Building conformal Quantitative RF model") underlying_model = RegressorAdapter(self.estimator_temp) self.normalizing_model = RegressorAdapter( KNeighborsRegressor( n_neighbors=conformal_settings['KNN_NN'])) # normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer(underlying_model, copy(self.normalizing_model), AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor # (RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.estimator.fit(X, Y) # results.append(('model', 'model type', 'conformal RF quantitative')) # Conformal classifier else: LOG.info("Building conformal Qualitative RF model") self.estimator = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) # Fit estimator to the data self.estimator.fit(X, Y) # results.append(('model', 'model type', 'conformal RF qualitative')) except Exception as e: return False, f'Exception building conformal RF estimator with exception {e}' return True, results ## Overriding of parent methods # def CF_quantitative_validation(self): # ''' performs validation for conformal quantitative models ''' # def CF_qualitative_validation(self): # ''' performs validation for conformal qualitative models ''' # def quantitativeValidation(self): # ''' performs validation for quantitative models ''' # def qualitativeValidation(self): # ''' performs validation for qualitative models ''' # def validate(self): # ''' Validates the model and computes suitable model quality scoring values''' # def optimize(self, X, Y, estimator, tune_parameters): # ''' optimizes a model using a grid search over a range of values for diverse parameters''' # def regularProject(self, Xb, results): # ''' projects a collection of query objects in a regular model, for obtaining predictions ''' # def conformalProject(self, Xb, results): # ''' projects a collection of query objects in a conformal model, for obtaining predictions ''' # def project(self, Xb, results): # ''' Uses the X matrix provided as argument to predict Y'''
from nonconformist.nc import MarginErrFunc from nonconformist.nc import ClassifierNc, RegressorNc, RegressorNormalizer from nonconformist.nc import AbsErrorErrFunc, SignErrorErrFunc from nonconformist.evaluation import cross_val_score from nonconformist.evaluation import ClassIcpCvHelper, RegIcpCvHelper from nonconformist.evaluation import class_avg_c, class_mean_errors from nonconformist.evaluation import reg_mean_errors, reg_median_size # ----------------------------------------------------------------------------- # Classification # ----------------------------------------------------------------------------- data = load_iris() icp = IcpClassifier( ClassifierNc(ClassifierAdapter(RandomForestClassifier(n_estimators=100)), MarginErrFunc())) icp_cv = ClassIcpCvHelper(icp) scores = cross_val_score(icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[class_mean_errors, class_avg_c], significance_levels=[0.05, 0.1, 0.2]) print('Classification: iris') scores = scores.drop(['fold', 'iter'], axis=1) print(scores.groupby(['significance']).mean())
def test_nc_factory(self): def score_model(icp, icp_name, ds, ds_name, scoring_funcs): scores = cross_val_score( icp, ds.data, ds.target, iterations=10, folds=10, scoring_funcs=scoring_funcs, significance_levels=[0.05, 0.1, 0.2], ) print("\n{}: {}".format(icp_name, ds_name)) scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Classification # ----------------------------------------------------------------------------- data = load_iris() nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100)) icp = IcpClassifier(nc) icp_cv = ClassIcpCvHelper(icp) score_model(icp_cv, "IcpClassifier", data, "iris", [class_mean_errors, class_avg_c]) # ----------------------------------------------------------------------------- # Classification (normalized) # ----------------------------------------------------------------------------- data = load_iris() nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100), normalizer_model=KNeighborsRegressor()) icp = IcpClassifier(nc) icp_cv = ClassIcpCvHelper(icp) score_model(icp_cv, "IcpClassifier (normalized)", data, "iris", [class_mean_errors, class_avg_c]) # ----------------------------------------------------------------------------- # Classification OOB # ----------------------------------------------------------------------------- data = load_iris() nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100, oob_score=True), oob=True) icp_cv = OobCpClassifier(nc) score_model(icp_cv, "IcpClassifier (OOB)", data, "iris", [class_mean_errors, class_avg_c]) # ----------------------------------------------------------------------------- # Classification OOB normalized # ----------------------------------------------------------------------------- data = load_iris() nc = NcFactory.create_nc( RandomForestClassifier(n_estimators=100, oob_score=True), oob=True, normalizer_model=KNeighborsRegressor(), ) icp_cv = OobCpClassifier(nc) score_model( icp_cv, "IcpClassifier (OOB, normalized)", data, "iris", [class_mean_errors, class_avg_c], ) # ----------------------------------------------------------------------------- # Regression # ----------------------------------------------------------------------------- data = load_diabetes() nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100)) icp = IcpRegressor(nc) icp_cv = RegIcpCvHelper(icp) score_model(icp_cv, "IcpRegressor", data, "diabetes", [reg_mean_errors, reg_median_size]) # ----------------------------------------------------------------------------- # Regression (normalized) # ----------------------------------------------------------------------------- data = load_diabetes() nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100), normalizer_model=KNeighborsRegressor()) icp = IcpRegressor(nc) icp_cv = RegIcpCvHelper(icp) score_model( icp_cv, "IcpRegressor (normalized)", data, "diabetes", [reg_mean_errors, reg_median_size], ) # ----------------------------------------------------------------------------- # Regression OOB # ----------------------------------------------------------------------------- data = load_diabetes() nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100, oob_score=True), oob=True) icp_cv = OobCpRegressor(nc) score_model(icp_cv, "IcpRegressor (OOB)", data, "diabetes", [reg_mean_errors, reg_median_size]) # ----------------------------------------------------------------------------- # Regression OOB normalized # ----------------------------------------------------------------------------- data = load_diabetes() nc = NcFactory.create_nc( RandomForestRegressor(n_estimators=100, oob_score=True), oob=True, normalizer_model=KNeighborsRegressor(), ) icp_cv = OobCpRegressor(nc) score_model( icp_cv, "IcpRegressor (OOB, normalized)", data, "diabetes", [reg_mean_errors, reg_median_size], )
def test_acp_classification_tree(self): # ----------------------------------------------------------------------------- # Experiment setup # ----------------------------------------------------------------------------- data = load_iris() idx = np.random.permutation(data.target.size) train = idx[:int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] truth = data.target[test].reshape(-1, 1) columns = ["C-{}".format(i) for i in np.unique(data.target)] + ["truth"] significance = 0.1 # ----------------------------------------------------------------------------- # Define models # ----------------------------------------------------------------------------- models = { "ACP-RandomSubSampler": AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), RandomSubSampler(), ), "ACP-CrossSampler": AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), CrossSampler(), ), "ACP-BootstrapSampler": AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), BootstrapSampler(), ), "CCP": CrossConformalClassifier( IcpClassifier( ClassifierNc(ClassifierAdapter( DecisionTreeClassifier())))), "BCP": BootstrapConformalClassifier( IcpClassifier( ClassifierNc(ClassifierAdapter( DecisionTreeClassifier())))), } # ----------------------------------------------------------------------------- # Train, predict and evaluate # ----------------------------------------------------------------------------- for name, model in models.items(): model.fit(data.data[train, :], data.target[train]) prediction = model.predict(data.data[test, :], significance=significance) table = np.hstack((prediction, truth)) df = pd.DataFrame(table, columns=columns) print("\n{}".format(name)) print("Error rate: {}".format( class_mean_errors(prediction, truth, significance))) print(df) self.assertTrue(True)
train = data.drop(['id'], axis=1) train = train.drop(['target'], axis=1).values part1 = int(0.7 * len(train)) for xx in range(1, nmodels + 1): modelfile2 = infile + "_nonconf" + "_" + str(xx) + ".model" print("Working on model", xx) idx = np.random.permutation(int(len(train))) print(idx) trainset = idx[:part1] calset = idx[part1:] nc = ProbEstClassifierNc(RandomForestClassifier, margin, model_params={'n_estimators': 100}) icp_norm = IcpClassifier(nc, condition=lambda instance: instance[1]) icp_norm.fit(train[trainset], target[trainset]) icp_norm.calibrate(train[calset], target[calset]) cloudpickle.dump(icp_norm, f) f.close() if mode != 't': outfile = predfile + "_nonconf_pred100sum.csv" f2 = open(outfile, 'w') f2.write('id\tp-value_low_class\tp-value_high_class\tclass\tmodel\n') f2.close() data = pd.read_csv(predfile, sep='\t', header=0, index_col=None) data.loc[data['target'] < 0, 'target'] = 0 labels = data['id']
idx = np.random.permutation(data.target.size) train = idx[:int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] truth = data.target[test].reshape(-1, 1) columns = ['C-{}'.format(i) for i in np.unique(data.target)] + ['truth'] significance = 0.1 # ----------------------------------------------------------------------------- # Define models # ----------------------------------------------------------------------------- models = { 'ACP-RandomSubSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter( DecisionTreeClassifier()))), RandomSubSampler()), 'ACP-CrossSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter( DecisionTreeClassifier()))), CrossSampler()), 'ACP-BootstrapSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter( DecisionTreeClassifier()))), BootstrapSampler()), 'CCP': CrossConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter( DecisionTreeClassifier())))), 'BCP': BootstrapConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter(
def build(self): '''Build a new RF model with the X and Y numpy matrices ''' if self.failed: return False X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) if self.cv: self.cv = getCrossVal(self.cv, self.estimator_parameters["random_state"], self.n, self.p) if self.tune: if self.quantitative: self.optimize(X, Y, RandomForestRegressor(), self.tune_parameters) results.append( ('model', 'model type', 'RF quantitative (optimized)')) else: self.optimize(X, Y, RandomForestClassifier(), self.tune_parameters) results.append( ('model', 'model type', 'RF qualitative (optimized)')) else: if self.quantitative: log.info("Building Quantitative RF model") self.estimator_parameters.pop('class_weight', None) self.estimator = RandomForestRegressor( **self.estimator_parameters) results.append(('model', 'model type', 'RF quantitative')) else: log.info("Building Qualitative RF model") self.estimator = RandomForestClassifier( **self.estimator_parameters) results.append(('model', 'model type', 'RF qualitative')) if self.conformal: if self.quantitative: underlying_model = RegressorAdapter(self.estimator) normalizing_model = RegressorAdapter( KNeighborsRegressor(n_neighbors=5)) normalizing_model = RegressorAdapter(self.estimator) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.conformal_pred = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.conformal_pred.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal RF quantitative')) else: self.conformal_pred = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator), MarginErrFunc())), BootstrapSampler()) self.conformal_pred.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal RF qualitative')) self.estimator.fit(X, Y) return True, results #### Overriding of parent methods # def CF_quantitative_validation(self): # ''' performs validation for conformal quantitative models ''' # def CF_qualitative_validation(self): # ''' performs validation for conformal qualitative models ''' # def quantitativeValidation(self): # ''' performs validation for quantitative models ''' # def qualitativeValidation(self): # ''' performs validation for qualitative models ''' # def validate(self): # ''' Validates the model and computes suitable model quality scoring values''' # def optimize(self, X, Y, estimator, tune_parameters): # ''' optimizes a model using a grid search over a range of values for diverse parameters''' # def regularProject(self, Xb, results): # ''' projects a collection of query objects in a regular model, for obtaining predictions ''' # def conformalProject(self, Xb, results): # ''' projects a collection of query objects in a conformal model, for obtaining predictions ''' # def project(self, Xb, results): # ''' Uses the X matrix provided as argument to predict Y'''
def build(self): '''Build a new SVM model with the X and Y numpy matrices''' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) # If tune then call gridsearch to optimize the estimator if self.param.getVal('tune'): try: # Check type of model if self.param.getVal('quantitative'): self.optimize(X, Y, svm.SVR(**self.estimator_parameters), self.tune_parameters) results.append(('model', 'model type', 'SVM quantitative (optimized)')) else: self.optimize(X, Y, svm.SVC(**self.estimator_parameters), self.tune_parameters) results.append( ('model', 'model type', 'SVM qualitative (optimized)')) LOG.debug('SVM estimator optimized') except Exception as e: LOG.error(f'Exception optimizing SVM' f'estimator with exception {e}') else: try: LOG.info("Building SVM model") if self.param.getVal('quantitative'): LOG.info("Building Quantitative SVM-R model") self.estimator = svm.SVR(**self.estimator_parameters) results.append(('model', 'model type', 'SVM quantitative')) else: self.estimator = svm.SVC(**self.estimator_parameters) results.append(('model', 'model type', 'SVM qualitative')) except Exception as e: LOG.error(f'Exception building SVM' f'estimator with exception {e}') self.estimator.fit(X, Y) self.estimator_temp = copy(self.estimator) if self.param.getVal('conformal'): try: LOG.info("Building aggregated conformal SVM model") if self.param.getVal('quantitative'): underlying_model = RegressorAdapter(self.estimator_temp) # normalizing_model = RegressorAdapter( # KNeighborsRegressor(n_neighbors=5)) normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor( # RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.estimator.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal SVM quantitative')) else: self.estimator = AggregatedCp( IcpClassifier( ClassifierNc( ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) self.estimator.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal SVM qualitative')) except Exception as e: LOG.error(f'Exception building aggregated conformal SVM ' f'estimator with exception {e}') # Fit estimator to the data return True, results
def test_cross_validation(self): # ----------------------------------------------------------------------------- # Classification # ----------------------------------------------------------------------------- data = load_iris() icp = IcpClassifier( ClassifierNc( ClassifierAdapter(RandomForestClassifier(n_estimators=100)), MarginErrFunc())) icp_cv = ClassIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[class_mean_errors, class_avg_c], significance_levels=[0.05, 0.1, 0.2], ) print("Classification: iris") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, absolute error # ----------------------------------------------------------------------------- data = load_diabetes() icp = IcpRegressor( RegressorNc( RegressorAdapter(RandomForestRegressor(n_estimators=100)), AbsErrorErrFunc())) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Absolute error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, normalized absolute error # ----------------------------------------------------------------------------- data = load_diabetes() underlying_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) normalizer_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) normalizer = RegressorNormalizer(underlying_model, normalizer_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) icp = IcpRegressor(nc) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Normalized absolute error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, normalized signed error # ----------------------------------------------------------------------------- data = load_diabetes() icp = IcpRegressor( RegressorNc( RegressorAdapter(RandomForestRegressor(n_estimators=100)), SignErrorErrFunc())) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Signed error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, signed error # ----------------------------------------------------------------------------- data = load_diabetes() underlying_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) normalizer_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) # The normalization model can use a different error function than is # used to measure errors on the underlying model normalizer = RegressorNormalizer(underlying_model, normalizer_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, SignErrorErrFunc(), normalizer) icp = IcpRegressor(nc) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Normalized signed error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean())
def CF_qualitative_validation(self): ''' performs validation for conformal qualitative models ''' # Make a copy of original matrices. X = self.X.copy() Y = self.Y.copy() # Total number of class 0 correct predictions. c0_correct_all = 0 # Total number of class 0 incorrect predictions. c0_incorrect_all = 0 # Total number of class 1 correct predictions. c1_correct_all = 0 # Total number of class 1 incorrect predictions c1_incorrect_all = 0 # Total number of instances out of the applicability domain. not_predicted_all = 0 info = [] kf = KFold(n_splits=5, shuffle=True, random_state=46) # Copy Y vector to use it as template to assign predictions Y_pred = copy.copy(Y).tolist() try: for train_index, test_index in kf.split(X): # Generate training and test sets X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] # Create the aggregated conformal classifier. conformal_pred = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) # Fit the conformal classifier to the data conformal_pred.fit(X_train, Y_train) # Perform prediction on test set prediction = conformal_pred.predict( X_test, self.param.getVal('conformalSignificance')) # Assign the prediction the correct index. for index, el in enumerate(test_index): Y_pred[el] = prediction[index] # Iterate over the prediction and check the result for i in range(len(Y_pred)): real = float(Y[i]) predicted = Y_pred[i] if predicted[0] != predicted[1]: if real == 0 and predicted[0] == True: c0_correct_all += 1 if real == 0 and predicted[1] == True: c0_incorrect_all += 1 if real == 1 and predicted[1] == True: c1_correct_all += 1 if real == 1 and predicted[0] == True: c1_incorrect_all += 1 else: not_predicted_all += 1 except Exception as e: LOG.error(f'Qualitative conformal validation' f' failed with exception: {e}') raise e # Get the mean confusion matrix. self.TN = c0_correct_all self.FP = c0_incorrect_all self.TP = c1_correct_all self.FN = c1_incorrect_all not_predicted_all = not_predicted_all info.append(('TP', 'True positives in cross-validation', self.TP)) info.append(('TN', 'True negatives in cross-validation', self.TN)) info.append(('FP', 'False positives in cross-validation', self.FP)) info.append(('FN', 'False negatives in cross-validation', self.FN)) # Compute sensitivity, specificity and MCC try: self.sensitivity = (self.TP / (self.TP + self.FN)) except Exception as e: LOG.error(f'Failed to compute sensibility with' f'exception {e}') self.sensitivity = '-' try: self.specificity = (self.TN / (self.TN + self.FP)) except Exception as e: LOG.error(f'Failed to compute specificity with' f'exception {e}') self.specificity = '-' try: # Compute Matthews Correlation Coefficient self.mcc = (((self.TP * self.TN) - (self.FP * self.FN)) / np.sqrt( (self.TP + self.FP) * (self.TP + self.FN) * (self.TN + self.FP) * (self.TN + self.FN))) except Exception as e: LOG.error(f'Failed to compute Mathews Correlation Coefficient' f'exception {e}') self.mcc = '-' info.append(('Sensitivity', 'Sensitivity in cross-validation', self.sensitivity)) info.append(('Specificity', 'Specificity in cross-validation', self.specificity)) info.append( ('MCC', 'Matthews Correlation Coefficient in cross-validation', self.mcc)) try: # Compute coverage (% of compounds inside the applicability domain) self.conformal_coverage = ( self.TN + self.FP + self.TP + self.FN) / ( (self.TN + self.FP + self.TP + self.FN) + not_predicted_all) except Exception as e: LOG.error(f'Failed to compute conformal coverage with' f'exception {e}') self.conformal_coverage = '-' try: # Compute accuracy (% of correct predictions) self.conformal_accuracy = ( float(self.TN + self.TP) / float(self.FP + self.FN + self.TN + self.TP)) except Exception as e: LOG.error(f'Failed to compute conformal accuracy with' f'exception {e}') self.conformal_accuracy = '-' info.append(('Conformal_coverage', 'Conformal coverage', self.conformal_coverage)) info.append(('Conformal_accuracy', 'Conformal accuracy', self.conformal_accuracy)) results = {} results['quality'] = info #results ['classes'] = prediction return True, results
classification_method = DecisionTreeClassifier() file_name = 'decision_tree.xls' ACP_Random = [] ACP_Cross = [] ACP_Boot = [] CCP = [] BCP = [] # ----------------------------------------------------------------------------- # Define models # ----------------------------------------------------------------------------- models = { 'ACP-RandomSubSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))), RandomSubSampler()), 'ACP-CrossSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))), CrossSampler()), 'ACP-BootstrapSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))), BootstrapSampler()), 'CCP': CrossConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method)))), 'BCP': BootstrapConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))))
scoring_funcs=scoring_funcs, significance_levels=[0.05, 0.1, 0.2], ) print("\n{}: {}".format(icp_name, ds_name)) scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Classification # ----------------------------------------------------------------------------- data = load_iris() nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100)) icp = IcpClassifier(nc) icp_cv = ClassIcpCvHelper(icp) score_model(icp_cv, "IcpClassifier", data, "iris", [class_mean_errors, class_avg_c]) # ----------------------------------------------------------------------------- # Classification (normalized) # ----------------------------------------------------------------------------- data = load_iris() nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100), normalizer_model=KNeighborsRegressor()) icp = IcpClassifier(nc) icp_cv = ClassIcpCvHelper(icp) score_model(icp_cv, "IcpClassifier (normalized)", data, "iris",
import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import load_iris from nonconformist.base import ClassifierAdapter from nonconformist.icp import IcpClassifier from nonconformist.nc import ClassifierNc data = load_iris() x, y = data.data, data.target for i, y_ in enumerate(np.unique(y)): y[y == y_] = i n_instances = y.size idx = np.random.permutation(n_instances) train_idx = idx[:int(n_instances / 3)] cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)] test_idx = idx[2 * int(n_instances / 3):] nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier())) icp = IcpClassifier(nc) icp.fit(x[train_idx, :], y[train_idx]) icp.calibrate(x[cal_idx, :], y[cal_idx]) print(pd.DataFrame(icp.predict_conf(x[test_idx, :]), columns=['Label', 'Confidence', 'Credibility']))
def ccp_predict(self, data_lbld, data_unlbld, new_lbld): # Create SMOTE instance for class rebalancing smote = SMOTE(random_state=self.random_state) # Create instance of classifier classifier_y = self.classifiers['classifier_y'] parameters_y = self.clf_parameters['classifier_y'] clf = classifier_y.set_params(**parameters_y) X = data_lbld.iloc[:, :-2] y = data_lbld.iloc[:, -1] X_new = new_lbld.iloc[:, :-2] y_new = new_lbld.iloc[:, -1] X = X.append(X_new, sort=False) y = y.append(y_new) X_unlbld = data_unlbld.iloc[:, :-2] sss = StratifiedKFold(n_splits=5, random_state=self.random_state) sss.get_n_splits(X, y) p_values = [] for train_index, calib_index in sss.split(X, y): X_train, X_calib = X.iloc[train_index], X.iloc[calib_index] y_train, y_calib = y.iloc[train_index], y.iloc[calib_index] if self.rebalancing_parameters['SMOTE_y']: X_train, y_train = smote.fit_resample(X_train, y_train) clf.fit(X_train[:, :-1], y_train, sample_weight=X_train[:, -1]) else: clf.fit(X_train.iloc[:, :-1], y_train, sample_weight=X_train.iloc[:, -1]) nc = NcFactory.create_nc(clf, MarginErrFunc()) icp = IcpClassifier(nc) if self.rebalancing_parameters['SMOTE_y']: icp.fit(X_train[:, :-1], y_train) else: icp.fit(X_train.iloc[:, :-1].values, y_train) icp.calibrate(X_calib.iloc[:, :-1].values, y_calib) # Predict confidences for validation sample and unlabeled sample p_values.append( icp.predict(X_unlbld.iloc[:, :-1].values, significance=None)) mean_p_values = np.array(p_values).mean(axis=0) ccp_predictions = pd.DataFrame(mean_p_values, columns=['mean_p_0', 'mean_p_1']) ccp_predictions["credibility"] = [ row.max() for _, row in ccp_predictions.iterrows() ] ccp_predictions["confidence"] = [ 1 - row.min() for _, row in ccp_predictions.iterrows() ] ccp_predictions.index = X_unlbld.index return ccp_predictions
def CF_qualitative_validation(self): ''' performs validation for conformal qualitative models ''' # Make a copy of original matrices. X = self.X.copy() Y = self.Y.copy() # Number of external validations for the # aggregated conformal estimator. seeds = [5, 7, 35] # Total number of class 0 correct predictions. c0_correct_all = [] # Total number of class 0 incorrect predictions. c0_incorrect_all = [] # Total number of class 1 correct predictions. c1_correct_all = [] # Total number of class 1 incorrect predictions c1_incorrect_all = [] # Total number of instances out of the applicability domain. not_predicted_all = [] results = [] # Iterate over the seeds. try: for i in range(len(seeds)): # Generate training and test sets X_train, X_test,\ Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=i, shuffle=True) # Create the aggregated conformal classifier. conformal_pred = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator), MarginErrFunc())), BootstrapSampler()) # Fit the conformal classifier to the data conformal_pred.fit(X_train, Y_train) # Perform prediction on test set prediction = conformal_pred.predict(X_test, self.conformalSignificance) c0_correct = 0 c1_correct = 0 not_predicted = 0 c0_incorrect = 0 c1_incorrect = 0 # Iterate over the prediction and check the result for i in range(len(Y_test)): real = float(Y_test[i]) predicted = prediction[i] if predicted[0] != predicted[1]: if real == 0 and predicted[0] == True: c0_correct += 1 if real == 0 and predicted[1] == True: c0_incorrect += 1 if real == 1 and predicted[1] == True: c1_correct += 1 if real == 1 and predicted[0] == True: c1_incorrect += 1 else: not_predicted += 1 # Add the results to the lists. c0_correct_all.append(c0_correct) c0_incorrect_all.append(c0_incorrect) c1_correct_all.append(c1_correct) c1_incorrect_all.append(c1_incorrect) not_predicted_all.append(not_predicted) except Exception as e: LOG.error(f'Qualitative conformal validation' f' failed with exception: {e}') raise e # Get the mean confusion matrix. self.TN = np.int(np.mean(c0_correct_all)) self.FP = np.int(np.mean(c0_incorrect_all)) self.TP = np.int(np.mean(c1_correct_all)) self.FN = np.int(np.mean(c1_incorrect_all)) not_predicted_all = np.int(np.mean(not_predicted_all)) results.append(('TP', 'True positives in cross-validation', self.TP)) results.append(('TN', 'True negatives in cross-validation', self.TN)) results.append(('FP', 'False positives in cross-validation', self.FP)) results.append(('FN', 'False negatives in cross-validation', self.FN)) # Compute sensitivity and specificity self.sensitivity = (self.TP / (self.TP + self.FN)) self.specificity = (self.TN / (self.TN + self.FP)) # Compute Matthews Correlation Coefficient self.mcc = (((self.TP * self.TN) - (self.FP * self.FN)) / np.sqrt( (self.TP + self.FP) * (self.TP + self.FN) * (self.TN + self.FP) * (self.TN + self.FN))) results.append(('Sensitivity', 'Sensitivity in cross-validation', self.sensitivity)) results.append(('Specificity', 'Specificity in cross-validation', self.specificity)) results.append( ('MCC', 'Matthews Correlation Coefficient in cross-validation', self.mcc)) # Compute coverage (% of compouds inside the applicability domain) self.conformal_coverage = (self.TN + self.FP + self.TP + self.FN) / ( (self.TN + self.FP + self.TP + self.FN) + not_predicted_all) # Compute accuracy (% of correct predictions) self.conformal_accuracy = float(self.TN + self.TP) / float(self.FP + self.FN + self.TN + self.TP) results.append(('Conformal_coverage', 'Conformal coverage', self.conformal_coverage)) results.append(('Conformal_accuracy', 'Conformal accuracy', self.conformal_accuracy)) return True, (results, )