def CF_QuanVal(X, Y, estimator, conformalSignificance): print("Starting quantitative conformal prediction validation") icp = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(estimator))), BootstrapSampler()) # icp = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(estimator), # AbsErrorErrFunc(), RegressorNormalizer(estimator, # RegressorAdapter(copy.copy(estimator)), AbsErrorErrFunc())))) # icp_cv = RegIcpCvHelper(icp) # scores = conformal_cross_val_score(icp_cv, # X, # Y, # iterations=5, # folds=5, # scoring_funcs=[reg_mean_errors, reg_median_size, reg_mean_size], # significance_levels=[0.05, 0.1, 0.2, conformalSignificance]) icp.fit(X[:30], Y[:30]) prediction = icp.predict(X[30:]) prediction_sign = icp.predict(X[30:], significance=0.25) interval = prediction_sign[:, 0] - prediction_sign[:, 1] print(np.mean(interval)) print(interval) print("\n") print(prediction) print(prediction_sign) return (icp)
def build(self): '''Build a new qualitative GNB model with the X and Y numpy matrices''' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) # Build estimator LOG.info('Building GaussianNB model') self.estimator = GaussianNB(**self.estimator_parameters) results.append(('model', 'model type', 'GNB qualitative')) self.estimator.fit(X, Y) if not self.param.getVal('conformal'): return True, results # If conformal, then create aggregated conformal classifier self.estimator_temp = copy(self.estimator) self.estimator = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) # Fit estimator to the data self.estimator.fit(X, Y) results.append(('model', 'model type', 'conformal GNB qualitative')) return True, results
def CF_QualVal(X, Y, estimator, conformalSignificance): """ Qualitative conformal predictor validation""" print("Starting qualitative conformal prediction validation") icp = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(estimator), MarginErrFunc())), BootstrapSampler()) Y = np.asarray(Y).reshape(-1, 1) loo = LeaveOneOut() predictions = [] for train, test in loo.split(X): Xn = [X[i] for i in train] Yn = [Y[i] for i in train] Xn, mux = center(Xn) Xn, wgx = scale(Xn, True) Yn = np.asarray(Yn) Xout = X[test] Yout = Y[test[0]] icp.fit(Xn, Yn) predictions.append(icp.predict(Xout, significance=0.15)) predictions = [(x[0]).tolist() for x in predictions] predictions = np.asarray(predictions) table = np.hstack((predictions, Y)) print('Error rate: {}'.format(class_mean_errors(predictions, Y, 0.15))) print('Class one: ', class_one_c(predictions, Y, 0.15)) return icp
def CF_quantitative_validation(self): ''' Performs internal validation for conformal quantitative models ''' # Make a copy of original matrices. X = self.X.copy() Y = self.Y.copy() # Number of external validations for the aggregated conformal estimator. seeds = [5, 7, 35] # Interval means for each aggregated conformal estimator (out of 3) interval_means = [] # Accuracies for each aggregated conformal estimator (out of 3) accuracies = [] results = [] try: for i in range(len(seeds)): # Generate training a test sets X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=0.25, random_state=i, shuffle=False) # Create the aggregated conformal regressor. conformal_pred = AggregatedCp( IcpRegressor(RegressorNc(RegressorAdapter( self.estimator))), BootstrapSampler()) # Fit conformal regressor to the data conformal_pred.fit(X_train, Y_train) # Perform prediction on test set prediction = conformal_pred.predict(X_test, self.conformalSignificance) # Add the n validation interval means interval_means.append( np.mean( np.abs(prediction[:, 0]) - np.abs(prediction[:, 1]))) Y_test = Y_test.reshape(-1, 1) # Get boolean mask of instances within the applicability domain. inside_interval = ((prediction[:, 0].reshape(-1, 1) < Y_test) & (prediction[:, 1].reshape(-1, 1) > Y_test)) # Compute the accuracy (number of instances within the AD). accuracy = np.sum(inside_interval) / len(Y_test) # Add validation result to the list of accuracies. accuracies.append(accuracy) except Exception as e: LOG.error(f'Quantitative conformal validation' f' failed with exception: {e}') raise e # Compute mean interval_means and accuracy. interval_means = np.mean(interval_means) accuracies = np.mean(accuracies) # Cut into two decimals. self.conformal_accuracy = float("{0:.2f}".format(accuracies)) self.conformal_mean_interval = float("{0:.2f}".format(interval_means)) #Add quality metrics to results. results.append(('Conformal_mean_interval', 'Conformal mean interval', self.conformal_mean_interval)) results.append(('Conformal_accuracy', 'Conformal accuracy', self.conformal_accuracy)) return True, (results, )
def build(self): if not self.quantitative: print("PLSR only applies to quantitative data") return False, "PLSR only applies to quantitative data" if self.failed: return False, "Error initiating model" X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) if self.cv: self.cv = getCrossVal(self.cv, 46, self.n, self.p) if self.tune: if self.optimiz == 'auto': super(PLSR, self).optimize(X, Y, PLS_r( **self.estimator_parameters), self.tune_parameters) elif self.optimiz == 'manual': self.optimize(X, Y, PLS_r( **self.estimator_parameters), self.tune_parameters) results.append( ('model', 'model type', 'PLSR quantitative (optimized)')) else: print("Building Quantitative PLSR") self.estimator = PLS_r(**self.estimator_parameters) results.append(('model', 'model type', 'PLSR quantitative')) if self.conformal: underlying_model = RegressorAdapter(self.estimator) normalizing_model = RegressorAdapter( KNeighborsRegressor(n_neighbors=1)) normalizing_model = RegressorAdapter(self.estimator) normalizer = RegressorNormalizer( underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) self.conformal_pred = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) # self.conformal_pred = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.conformal_pred.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal PLSR quantitative')) self.estimator.fit(X, Y) return True, results
def test_acp_regression_tree(self): # ----------------------------------------------------------------------------- # Experiment setup # ----------------------------------------------------------------------------- data = load_diabetes() idx = np.random.permutation(data.target.size) train = idx[:int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] truth = data.target[test] columns = ["min", "max", "truth"] significance = 0.1 # ----------------------------------------------------------------------------- # Define models # ----------------------------------------------------------------------------- models = { "ACP-RandomSubSampler": AggregatedCp( IcpRegressor( RegressorNc(RegressorAdapter(DecisionTreeRegressor()))), RandomSubSampler(), ), "ACP-CrossSampler": AggregatedCp( IcpRegressor( RegressorNc(RegressorAdapter(DecisionTreeRegressor()))), CrossSampler(), ), "ACP-BootstrapSampler": AggregatedCp( IcpRegressor( RegressorNc(RegressorAdapter(DecisionTreeRegressor()))), BootstrapSampler(), ), } # ----------------------------------------------------------------------------- # Train, predict and evaluate # ----------------------------------------------------------------------------- for name, model in models.items(): model.fit(data.data[train, :], data.target[train]) prediction = model.predict(data.data[test, :]) prediction_sign = model.predict(data.data[test, :], significance=significance) table = np.vstack((prediction_sign.T, truth)).T df = pd.DataFrame(table, columns=columns) print("\n{}".format(name)) print("Error rate: {}".format( reg_mean_errors(prediction, truth, significance))) print(df)
def CF_QualCal(X, Y, estimator): """Qualitative conformal predictor calibration""" acp = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(estimator), MarginErrFunc())), BootstrapSampler()) acp.fit(X, Y) # X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=0.30, random_state=42) # icp = IcpClassifier(ClassifierNc(ClassifierAdapter(estimator), # MarginErrFunc())) # icp.fit(X_train, y_train) # icp.calibrate(X_test, y_test) return acp
def SelectLabeled(self, labeled_data_x, labeled_data_y, unlabeled_data_x): # just append train data to labeled data labeled_x = np.concatenate( (self.init_labeled_data_x, labeled_data_x )) if len(labeled_data_x) > 0 else self.init_labeled_data_x labeled_y = np.concatenate( (self.init_labeled_data_y, labeled_data_y )) if len(labeled_data_x) > 0 else self.init_labeled_data_y # # create model to predict with confidence and credibility model = ClassifierAdapter( DecisionTreeClassifier(random_state=config.random_state, min_samples_leaf=config.min_samples_leaf)) model_acp = AggregatedCp( IcpClassifier(ClassifierNc(model), smoothing=True), BootstrapSampler()) model_acp.fit(labeled_x, labeled_y) s = model_acp.predict(unlabeled_data_x) # print(s) # # selection method labeled_ind = [ i for i, a in enumerate(s) if a.max() > config.confidence and 1 - a.min() > config.credibility ] unlabeled_ind = [ i for i, a in enumerate(s) if a.max() < config.confidence or 1 - a.min() < config.credibility ] labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x = \ np.take(unlabeled_data_x, labeled_ind, axis=0), np.take(s.argmax(axis=1), labeled_ind), np.take( unlabeled_data_x, unlabeled_ind, axis=0) # return labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x
def build(self): '''Build a new qualitative GNB model with the X and Y numpy matrices''' if self.failed: return False, "Error initiating model" X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) if self.cv: self.cv = getCrossVal(self.cv, 46, self.n, self.p) if self.quantitative: print("GNB only applies to qualitative data") return False, "GNB only applies to qualitative data" else: print("Building GaussianNB model") print(self.estimator_parameters) self.estimator = GaussianNB(**self.estimator_parameters) results.append(('model', 'model type', 'GNB qualitative')) if self.conformal: self.conformal_pred = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator), MarginErrFunc())), BootstrapSampler()) self.conformal_pred.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal GNB qualitative')) self.estimator.fit(X, Y) return True, results
def build(self): '''Build a new SVM model with the X and Y numpy matrices''' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) # If tune then call gridsearch to optimize the estimator if self.param.getVal('tune'): try: # Check type of model if self.param.getVal('quantitative'): self.optimize(X, Y, svm.SVR(**self.estimator_parameters), self.tune_parameters) results.append(('model', 'model type', 'SVM quantitative (optimized)')) else: self.optimize(X, Y, svm.SVC(**self.estimator_parameters), self.tune_parameters) results.append( ('model', 'model type', 'SVM qualitative (optimized)')) LOG.debug('SVM estimator optimized') except Exception as e: LOG.error(f'Exception optimizing SVM' f'estimator with exception {e}') else: try: LOG.info("Building SVM model") if self.param.getVal('quantitative'): LOG.info("Building Quantitative SVM-R model") self.estimator = svm.SVR(**self.estimator_parameters) results.append(('model', 'model type', 'SVM quantitative')) else: self.estimator = svm.SVC(**self.estimator_parameters) results.append(('model', 'model type', 'SVM qualitative')) except Exception as e: LOG.error(f'Exception building SVM' f'estimator with exception {e}') self.estimator.fit(X, Y) self.estimator_temp = copy(self.estimator) if self.param.getVal('conformal'): try: LOG.info("Building aggregated conformal SVM model") if self.param.getVal('quantitative'): underlying_model = RegressorAdapter(self.estimator_temp) # normalizing_model = RegressorAdapter( # KNeighborsRegressor(n_neighbors=5)) normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor( # RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.estimator.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal SVM quantitative')) else: self.estimator = AggregatedCp( IcpClassifier( ClassifierNc( ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) self.estimator.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal SVM qualitative')) except Exception as e: LOG.error(f'Exception building aggregated conformal SVM ' f'estimator with exception {e}') # Fit estimator to the data return True, results
# Define models # ----------------------------------------------------------------------------- models = { 'ACP-RandomSubSampler': AggregatedCp( IcpRegressor(RegressorNc(RegressorAdapter(DecisionTreeRegressor()))), RandomSubSampler()), 'ACP-CrossSampler': AggregatedCp( IcpRegressor(RegressorNc(RegressorAdapter(DecisionTreeRegressor()))), CrossSampler()), 'ACP-BootstrapSampler': AggregatedCp( IcpRegressor(RegressorNc(RegressorAdapter(DecisionTreeRegressor()))), BootstrapSampler()) } # ----------------------------------------------------------------------------- # Train, predict and evaluate # ----------------------------------------------------------------------------- for name, model in models.iteritems(): model.fit(data.data[train, :], data.target[train]) prediction = model.predict(data.data[test, :]) prediction_sign = model.predict(data.data[test, :], significance=significance) table = np.vstack((prediction_sign.T, truth)).T df = pd.DataFrame(table, columns=columns) print('\n{}'.format(name)) print('Error rate: {}'.format( reg_mean_errors(prediction, truth, significance)))
def CF_qualitative_validation(self): ''' performs validation for conformal qualitative models ''' # Make a copy of original matrices. X = self.X.copy() Y = self.Y.copy() # Total number of class 0 correct predictions. c0_correct_all = 0 # Total number of class 0 incorrect predictions. c0_incorrect_all = 0 # Total number of class 1 correct predictions. c1_correct_all = 0 # Total number of class 1 incorrect predictions c1_incorrect_all = 0 # Total number of instances out of the applicability domain. not_predicted_all = 0 info = [] kf = KFold(n_splits=5, shuffle=True, random_state=46) # Copy Y vector to use it as template to assign predictions Y_pred = copy.copy(Y).tolist() try: for train_index, test_index in kf.split(X): # Generate training and test sets X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] # Create the aggregated conformal classifier. conformal_pred = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) # Fit the conformal classifier to the data conformal_pred.fit(X_train, Y_train) # Perform prediction on test set prediction = conformal_pred.predict( X_test, self.param.getVal('conformalSignificance')) # Assign the prediction the correct index. for index, el in enumerate(test_index): Y_pred[el] = prediction[index] # Iterate over the prediction and check the result for i in range(len(Y_pred)): real = float(Y[i]) predicted = Y_pred[i] if predicted[0] != predicted[1]: if real == 0 and predicted[0] == True: c0_correct_all += 1 if real == 0 and predicted[1] == True: c0_incorrect_all += 1 if real == 1 and predicted[1] == True: c1_correct_all += 1 if real == 1 and predicted[0] == True: c1_incorrect_all += 1 else: not_predicted_all += 1 except Exception as e: LOG.error(f'Qualitative conformal validation' f' failed with exception: {e}') raise e # Get the mean confusion matrix. self.TN = c0_correct_all self.FP = c0_incorrect_all self.TP = c1_correct_all self.FN = c1_incorrect_all not_predicted_all = not_predicted_all info.append(('TP', 'True positives in cross-validation', self.TP)) info.append(('TN', 'True negatives in cross-validation', self.TN)) info.append(('FP', 'False positives in cross-validation', self.FP)) info.append(('FN', 'False negatives in cross-validation', self.FN)) # Compute sensitivity, specificity and MCC try: self.sensitivity = (self.TP / (self.TP + self.FN)) except Exception as e: LOG.error(f'Failed to compute sensibility with' f'exception {e}') self.sensitivity = '-' try: self.specificity = (self.TN / (self.TN + self.FP)) except Exception as e: LOG.error(f'Failed to compute specificity with' f'exception {e}') self.specificity = '-' try: # Compute Matthews Correlation Coefficient self.mcc = (((self.TP * self.TN) - (self.FP * self.FN)) / np.sqrt( (self.TP + self.FP) * (self.TP + self.FN) * (self.TN + self.FP) * (self.TN + self.FN))) except Exception as e: LOG.error(f'Failed to compute Mathews Correlation Coefficient' f'exception {e}') self.mcc = '-' info.append(('Sensitivity', 'Sensitivity in cross-validation', self.sensitivity)) info.append(('Specificity', 'Specificity in cross-validation', self.specificity)) info.append( ('MCC', 'Matthews Correlation Coefficient in cross-validation', self.mcc)) try: # Compute coverage (% of compounds inside the applicability domain) self.conformal_coverage = ( self.TN + self.FP + self.TP + self.FN) / ( (self.TN + self.FP + self.TP + self.FN) + not_predicted_all) except Exception as e: LOG.error(f'Failed to compute conformal coverage with' f'exception {e}') self.conformal_coverage = '-' try: # Compute accuracy (% of correct predictions) self.conformal_accuracy = ( float(self.TN + self.TP) / float(self.FP + self.FN + self.TN + self.TP)) except Exception as e: LOG.error(f'Failed to compute conformal accuracy with' f'exception {e}') self.conformal_accuracy = '-' info.append(('Conformal_coverage', 'Conformal coverage', self.conformal_coverage)) info.append(('Conformal_accuracy', 'Conformal accuracy', self.conformal_accuracy)) results = {} results['quality'] = info #results ['classes'] = prediction return True, results
def CF_quantitative_validation(self): ''' Performs internal validation for conformal quantitative models ''' # Make a copy of original matrices. X = self.X.copy() Y = self.Y.copy() info = [] kf = KFold(n_splits=self.param.getVal('ModelValidationN'), shuffle=True, random_state=46) # Copy Y vector to use it as template to assign predictions Y_pred = copy.copy(Y).tolist() try: for train_index, test_index in kf.split(X): # Generate training and test sets X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] # Generate training a test sets # Create the aggregated conformal regressor. conformal_pred = AggregatedCp( IcpRegressor( RegressorNc(RegressorAdapter(self.estimator_temp))), BootstrapSampler()) # Fit conformal regressor to the data conformal_pred.fit(X_train, Y_train) # Perform prediction on test set prediction = conformal_pred.predict( X_test, self.param.getVal('conformalSignificance')) # Assign the prediction its original index for index, el in enumerate(test_index): Y_pred[el] = prediction[index] except Exception as e: LOG.error(f'Quantitative conformal validation' f' failed with exception: {e}') raise e Y_pred = np.asarray(Y_pred) # Add the n validation interval means interval_mean = np.mean(np.abs((Y_pred[:, 0]) - (Y_pred[:, 1]))) # Get boolean mask of instances # within the applicability domain. inside_interval = ((Y_pred[:, 0].reshape(-1, 1) < Y) & (Y_pred[:, 1].reshape(-1, 1) > Y)) # Compute the accuracy (number of instances within the AD). accuracy = np.sum(inside_interval) / len(Y) # Cut into two decimals. self.conformal_interval_medians = (np.mean(Y_pred, axis=1)) self.conformal_accuracy = float("{0:.2f}".format(accuracy)) self.conformal_mean_interval = float("{0:.2f}".format(interval_mean)) #Add quality metrics to results. info.append(('Conformal_mean_interval', 'Conformal mean interval', self.conformal_mean_interval)) info.append(('Conformal_accuracy', 'Conformal accuracy', self.conformal_accuracy)) info.append( ('Conformal_interval_medians', 'Conformal interval medians', self.conformal_interval_medians)) info.append(('Conformal_prediction_ranges', 'Conformal prediction ranges', Y_pred)) results = {} results['quality'] = info return True, results
def build(self): # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) if self.param.getVal('tune'): # Optimize estimator using sklearn-gridsearch if self.estimator_parameters['optimize'] == 'auto': try: LOG.info('Optimizing PLSR using SK-LearnGridSearch') # Remove optimize key from parameter dictionary # to avoid sklearn estimator error (unexpected keyword) self.estimator_parameters.pop("optimize") super(PLSR, self).optimize(X, Y, PLS_r( **self.estimator_parameters), self.param.getDict('PLSR_optimize')) except Exception as e: LOG.error(f'Error performing SK-LearnGridSearch' f' on PLSR estimator with exception {e}') return False, f'Error performing SK-LearnGridSearch on PLSR estimator with exception {e}' # Optimize using flame implementation (recommended) elif self.estimator_parameters['optimize'] == 'manual': LOG.info('Optimizing PLSR using manual method') # Remove optimize key from parameter dictionary # to avoid sklearn estimator error (unexpected keyword) self.estimator_parameters.pop("optimize") success, message = self.optimize(X, Y, PLS_r( **self.estimator_parameters), self.param.getDict('PLSR_optimize')) if not success: return False, message else: LOG.error('Type of tune not recognized, check the input') return False, 'Type of tune not recognized, check the input' results.append(('model', 'model type', 'PLSR quantitative (optimized)')) else: LOG.info('Building Quantitative PLSR with no optimization') try: # Remove optimize key from parameters to avoid error self.estimator_parameters.pop("optimize") # as the sklearn estimator does not have this key self.estimator = PLS_r(**self.estimator_parameters) except Exception as e: LOG.error(f'Error at PLS_r instantiation with ' f'exception {e}') return False, f'Error at PLS_da instantiation with exception {e}' results.append(('model', 'model type', 'PLSR quantitative')) # Fit estimator to the data self.estimator.fit(X, Y) if not self.param.getVal('conformal'): return True, results self.estimator_temp = copy(self.estimator) try: LOG.info('Building PLSR aggregated conformal predictor') underlying_model = RegressorAdapter(self.estimator_temp) # normalizing_model = RegressorAdapter( # KNeighborsRegressor(n_neighbors=1)) normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) except Exception as e: LOG.error(f'Error building aggregated PLSR conformal' f' regressor with exception: {e}') return False, f'Error building aggregated PLSR conformal regressor with exception: {e}' # self.conformal_pred = AggregatedCp(IcpRegressor( # RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) # Fit conformal estimator to the data self.estimator.fit(X, Y) # overrides non-conformal results.append(('model', 'model type', 'conformal PLSR quantitative')) return True, results
def build(self): '''Build a new XGBOOST model with the X and Y numpy matrices ''' try: from xgboost.sklearn import XGBClassifier from xgboost.sklearn import XGBRegressor except Exception as e: return False, 'XGboost not found, please revise your environment' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) # If tune then call gridsearch to optimize the estimator if self.param.getVal('tune'): LOG.info("Optimizing XGBOOST estimator") try: # Check type of model if self.param.getVal('quantitative'): self.estimator = XGBRegressor( **self.estimator_parameters) self.optimize(X, Y, self.estimator, self.tune_parameters) results.append(('model','model type','XGBOOST quantitative (optimized)')) else: self.estimator = XGBClassifier( **self.estimator_parameters) params = self.estimator.get_params() params['num_class'] = 2 self.optimize(X, Y, self.estimator, self.tune_parameters) results.append(('model','model type','XGBOOST qualitative (optimized)')) except Exception as e: return False, f'Exception optimizing XGBOOST estimator with exception {e}' else: try: if self.param.getVal('quantitative'): LOG.info("Building Quantitative XGBOOST model") # params = { # 'objective': 'reg:squarederror', # 'missing': -99.99999, # # 'max_depth': 20, # # 'learning_rate': 1.0, # # 'silent': 1, # # 'n_estimators': 25 # } # self.estimator = XGBRegressor(**params) self.estimator = XGBRegressor(**self.estimator_parameters) results.append(('model', 'model type', 'XGBOOST quantitative')) else: LOG.info("Building Qualitative XGBOOST model") # params = { # 'objective': 'binary:logistic', # 'max_depth': 3, # #'learning_rate': 0.7, # #'silent': 1, # 'n_estimators': 100 # } self.estimator = XGBClassifier(**self.estimator_parameters) results.append(('model', 'model type', 'XGBOOST qualitative')) self.estimator.fit(X, Y) print(self.estimator) except Exception as e: raise e return False, f'Exception building XGBOOST estimator with exception {e}' self.estimator_temp = copy(self.estimator) if not self.param.getVal('conformal'): return True, results # Create the conformal estimator try: # Conformal regressor if self.param.getVal('quantitative'): LOG.info("Building conformal Quantitative XGBOOST model") underlying_model = RegressorAdapter(self.estimator_temp) #normalizing_model = RegressorAdapter( #KNeighborsRegressor(n_neighbors=5)) normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer( underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor # (RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.estimator.fit(X, Y) results.append(('model', 'model type', 'conformal XGBOOST quantitative')) # Conformal classifier else: LOG.info("Building conformal Qualitative XGBOOST model") self.estimator = AggregatedCp( IcpClassifier( ClassifierNc( ClassifierAdapter(self.estimator_temp), MarginErrFunc() ) ), BootstrapSampler()) # Fit estimator to the data self.estimator.fit(X, Y) results.append(('model', 'model type', 'conformal XGBOOST qualitative')) except Exception as e: raise e return False, f'Exception building conformal XGBOOST estimator with exception {e}' return True, results ## Overriding of parent methods # def CF_quantitative_validation(self): # ''' performs validation for conformal quantitative models ''' # def CF_qualitative_validation(self): # ''' performs validation for conformal qualitative models ''' # def quantitativeValidation(self): # ''' performs validation for quantitative models ''' # def qualitativeValidation(self): # ''' performs validation for qualitative models ''' # def validate(self): # ''' Validates the model and computes suitable model quality scoring values''' # def optimize(self, X, Y, estimator, tune_parameters): # ''' optimizes a model using a grid search over a range of values for diverse parameters''' # def regularProject(self, Xb, results): # ''' projects a collection of query objects in a regular model, for obtaining predictions ''' # def conformalProject(self, Xb, results): # ''' projects a collection of query objects in a conformal model, for obtaining predictions ''' # def project(self, Xb, results): # ''' Uses the X matrix provided as argument to predict Y'''
# Define models # ----------------------------------------------------------------------------- models = { 'ACP-RandomSubSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter( DecisionTreeClassifier()))), RandomSubSampler()), 'ACP-CrossSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter( DecisionTreeClassifier()))), CrossSampler()), 'ACP-BootstrapSampler': AggregatedCp( IcpClassifier(ClassifierNc(ClassifierAdapter( DecisionTreeClassifier()))), BootstrapSampler()), 'CCP': CrossConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter( DecisionTreeClassifier())))), 'BCP': BootstrapConformalClassifier( IcpClassifier(ClassifierNc(ClassifierAdapter( DecisionTreeClassifier())))) } # ----------------------------------------------------------------------------- # Train, predict and evaluate # ----------------------------------------------------------------------------- for name, model in models.iteritems(): model.fit(data.data[train, :], data.target[train])
def build(self): '''Build a new DL model with the X and Y numpy matrices ''' try: from keras.wrappers.scikit_learn import KerasClassifier from keras.wrappers.scikit_learn import KerasRegressor except Exception as e: return False, 'Keras not found, please revise your environment' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) # If tune then call gridsearch to optimize the estimator if self.param.getVal('tune'): LOG.info("Optimizing Keras estimator") try: # Check type of model if self.param.getVal('quantitative'): self.estimator = KerasRegressor( **self.estimator_parameters) self.optimize(X, Y, self.estimator, self.tune_parameters) results.append(('model', 'model type', 'KERAS quantitative (optimized)')) else: self.estimator = KerasClassifier( **self.estimator_parameters) #params = self.estimator.get_params() #params['num_class'] = 2 self.optimize(X, Y, self.estimator, self.tune_parameters) results.append(('model', 'model type', 'KERAS qualitative (optimized)')) except Exception as e: return False, f'Exception optimizing KERAS estimator with exception {e}' else: try: if self.param.getVal('quantitative'): LOG.info("Building Quantitative KERAS mode") self.estimator = KerasRegressor( build_fn=self.create_model, **self.estimator_parameters, verbose=0) results.append( ('model', 'model type', 'Keras quantitative')) else: LOG.info("Building Qualitative Keras model") self.estimator = KerasClassifier( build_fn=self.create_model, dim=self.X.shape[1], **self.estimator_parameters, verbose=0) results.append( ('model', 'model type', 'Keras qualitative')) self.estimator.fit(X, Y) print(self.estimator) except Exception as e: raise e return False, f'Exception building Keras estimator with exception {e}' self.estimator_temp = clone(self.estimator) if not self.param.getVal('conformal'): return True, results # Create the conformal estimator try: # Conformal regressor if self.param.getVal('quantitative'): LOG.info("Building conformal Quantitative Keras model") underlying_model = RegressorAdapter(self.estimator_temp) normalizing_model = RegressorAdapter( KNeighborsRegressor(n_neighbors=15)) # normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor # (RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.estimator.fit(X, Y) results.append( ('model', 'model type', 'conformal Keras quantitative')) # Conformal classifier else: LOG.info("Building conformal Qualitative Keras model") self.estimator = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) # Fit estimator to the data print('build finished') self.estimator.fit(X, Y) results.append( ('model', 'model type', 'conformal Keras qualitative')) except Exception as e: raise e return False, f'Exception building conformal Keras estimator with exception {e}' return True, []
def CF_qualitative_validation(self): ''' performs validation for conformal qualitative models ''' # Make a copy of original matrices. X = self.X.copy() Y = self.Y.copy() # Number of external validations for the # aggregated conformal estimator. seeds = [5, 7, 35] # Total number of class 0 correct predictions. c0_correct_all = [] # Total number of class 0 incorrect predictions. c0_incorrect_all = [] # Total number of class 1 correct predictions. c1_correct_all = [] # Total number of class 1 incorrect predictions c1_incorrect_all = [] # Total number of instances out of the applicability domain. not_predicted_all = [] results = [] # Iterate over the seeds. try: for i in range(len(seeds)): # Generate training and test sets X_train, X_test,\ Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=i, shuffle=True) # Create the aggregated conformal classifier. conformal_pred = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator), MarginErrFunc())), BootstrapSampler()) # Fit the conformal classifier to the data conformal_pred.fit(X_train, Y_train) # Perform prediction on test set prediction = conformal_pred.predict(X_test, self.conformalSignificance) c0_correct = 0 c1_correct = 0 not_predicted = 0 c0_incorrect = 0 c1_incorrect = 0 # Iterate over the prediction and check the result for i in range(len(Y_test)): real = float(Y_test[i]) predicted = prediction[i] if predicted[0] != predicted[1]: if real == 0 and predicted[0] == True: c0_correct += 1 if real == 0 and predicted[1] == True: c0_incorrect += 1 if real == 1 and predicted[1] == True: c1_correct += 1 if real == 1 and predicted[0] == True: c1_incorrect += 1 else: not_predicted += 1 # Add the results to the lists. c0_correct_all.append(c0_correct) c0_incorrect_all.append(c0_incorrect) c1_correct_all.append(c1_correct) c1_incorrect_all.append(c1_incorrect) not_predicted_all.append(not_predicted) except Exception as e: LOG.error(f'Qualitative conformal validation' f' failed with exception: {e}') raise e # Get the mean confusion matrix. self.TN = np.int(np.mean(c0_correct_all)) self.FP = np.int(np.mean(c0_incorrect_all)) self.TP = np.int(np.mean(c1_correct_all)) self.FN = np.int(np.mean(c1_incorrect_all)) not_predicted_all = np.int(np.mean(not_predicted_all)) results.append(('TP', 'True positives in cross-validation', self.TP)) results.append(('TN', 'True negatives in cross-validation', self.TN)) results.append(('FP', 'False positives in cross-validation', self.FP)) results.append(('FN', 'False negatives in cross-validation', self.FN)) # Compute sensitivity and specificity self.sensitivity = (self.TP / (self.TP + self.FN)) self.specificity = (self.TN / (self.TN + self.FP)) # Compute Matthews Correlation Coefficient self.mcc = (((self.TP * self.TN) - (self.FP * self.FN)) / np.sqrt( (self.TP + self.FP) * (self.TP + self.FN) * (self.TN + self.FP) * (self.TN + self.FN))) results.append(('Sensitivity', 'Sensitivity in cross-validation', self.sensitivity)) results.append(('Specificity', 'Specificity in cross-validation', self.specificity)) results.append( ('MCC', 'Matthews Correlation Coefficient in cross-validation', self.mcc)) # Compute coverage (% of compouds inside the applicability domain) self.conformal_coverage = (self.TN + self.FP + self.TP + self.FN) / ( (self.TN + self.FP + self.TP + self.FN) + not_predicted_all) # Compute accuracy (% of correct predictions) self.conformal_accuracy = float(self.TN + self.TP) / float(self.FP + self.FN + self.TN + self.TP) results.append(('Conformal_coverage', 'Conformal coverage', self.conformal_coverage)) results.append(('Conformal_accuracy', 'Conformal accuracy', self.conformal_accuracy)) return True, (results, )
def build(self): '''Build a new RF model with the X and Y numpy matrices ''' if self.failed: return False X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) if self.cv: self.cv = getCrossVal(self.cv, self.estimator_parameters["random_state"], self.n, self.p) if self.tune: if self.quantitative: self.optimize(X, Y, RandomForestRegressor(), self.tune_parameters) results.append( ('model', 'model type', 'RF quantitative (optimized)')) else: self.optimize(X, Y, RandomForestClassifier(), self.tune_parameters) results.append( ('model', 'model type', 'RF qualitative (optimized)')) else: if self.quantitative: log.info("Building Quantitative RF model") self.estimator_parameters.pop('class_weight', None) self.estimator = RandomForestRegressor( **self.estimator_parameters) results.append(('model', 'model type', 'RF quantitative')) else: log.info("Building Qualitative RF model") self.estimator = RandomForestClassifier( **self.estimator_parameters) results.append(('model', 'model type', 'RF qualitative')) if self.conformal: if self.quantitative: underlying_model = RegressorAdapter(self.estimator) normalizing_model = RegressorAdapter( KNeighborsRegressor(n_neighbors=5)) normalizing_model = RegressorAdapter(self.estimator) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.conformal_pred = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.conformal_pred.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal RF quantitative')) else: self.conformal_pred = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator), MarginErrFunc())), BootstrapSampler()) self.conformal_pred.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal RF qualitative')) self.estimator.fit(X, Y) return True, results #### Overriding of parent methods # def CF_quantitative_validation(self): # ''' performs validation for conformal quantitative models ''' # def CF_qualitative_validation(self): # ''' performs validation for conformal qualitative models ''' # def quantitativeValidation(self): # ''' performs validation for quantitative models ''' # def qualitativeValidation(self): # ''' performs validation for qualitative models ''' # def validate(self): # ''' Validates the model and computes suitable model quality scoring values''' # def optimize(self, X, Y, estimator, tune_parameters): # ''' optimizes a model using a grid search over a range of values for diverse parameters''' # def regularProject(self, Xb, results): # ''' projects a collection of query objects in a regular model, for obtaining predictions ''' # def conformalProject(self, Xb, results): # ''' projects a collection of query objects in a conformal model, for obtaining predictions ''' # def project(self, Xb, results): # ''' Uses the X matrix provided as argument to predict Y'''
def test_acp_classification_tree(self): # ----------------------------------------------------------------------------- # Experiment setup # ----------------------------------------------------------------------------- data = load_iris() idx = np.random.permutation(data.target.size) train = idx[:int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] truth = data.target[test].reshape(-1, 1) columns = ["C-{}".format(i) for i in np.unique(data.target)] + ["truth"] significance = 0.1 # ----------------------------------------------------------------------------- # Define models # ----------------------------------------------------------------------------- models = { "ACP-RandomSubSampler": AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), RandomSubSampler(), ), "ACP-CrossSampler": AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), CrossSampler(), ), "ACP-BootstrapSampler": AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))), BootstrapSampler(), ), "CCP": CrossConformalClassifier( IcpClassifier( ClassifierNc(ClassifierAdapter( DecisionTreeClassifier())))), "BCP": BootstrapConformalClassifier( IcpClassifier( ClassifierNc(ClassifierAdapter( DecisionTreeClassifier())))), } # ----------------------------------------------------------------------------- # Train, predict and evaluate # ----------------------------------------------------------------------------- for name, model in models.items(): model.fit(data.data[train, :], data.target[train]) prediction = model.predict(data.data[test, :], significance=significance) table = np.hstack((prediction, truth)) df = pd.DataFrame(table, columns=columns) print("\n{}".format(name)) print("Error rate: {}".format( class_mean_errors(prediction, truth, significance))) print(df) self.assertTrue(True)
def build(self): '''Build a new RF model with the X and Y numpy matrices ''' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) results.append(('model', 'model type', 'RF')) conformal = self.param.getVal('conformal') # If tune then call gridsearch to optimize the estimator if self.param.getVal('tune'): LOG.info("Optimizing RF estimator") try: # Check type of model if self.param.getVal('quantitative'): self.estimator = RandomForestRegressor( **self.estimator_parameters) self.optimize(X, Y, self.estimator, self.tune_parameters) # results.append(('model','model type','RF quantitative (optimized)')) else: self.estimator = RandomForestClassifier( **self.estimator_parameters) self.optimize(X, Y, self.estimator, self.tune_parameters) # results.append(('model','model type','RF qualitative (optimized)')) except Exception as e: return False, f'Exception optimizing RF estimator with exception {e}' else: try: if self.param.getVal('quantitative'): self.estimator = RandomForestRegressor( **self.estimator_parameters) if not conformal: LOG.info("Building Quantitative RF model") # results.append(('model', 'model type', 'RF quantitative')) else: self.estimator = RandomForestClassifier( **self.estimator_parameters) if not conformal: LOG.info("Building Qualitative RF model") # results.append(('model', 'model type', 'RF qualitative')) self.estimator.fit(X, Y) except Exception as e: return False, f'Exception building RF estimator with exception {e}' if not conformal: return True, results self.estimator_temp = copy(self.estimator) # Create the conformal estimator try: # Conformal regressor if self.param.getVal('quantitative'): conformal_settings = self.param.getDict('conformal_settings') LOG.info("Building conformal Quantitative RF model") underlying_model = RegressorAdapter(self.estimator_temp) self.normalizing_model = RegressorAdapter( KNeighborsRegressor( n_neighbors=conformal_settings['KNN_NN'])) # normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer(underlying_model, copy(self.normalizing_model), AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor # (RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.estimator.fit(X, Y) # results.append(('model', 'model type', 'conformal RF quantitative')) # Conformal classifier else: LOG.info("Building conformal Qualitative RF model") self.estimator = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) # Fit estimator to the data self.estimator.fit(X, Y) # results.append(('model', 'model type', 'conformal RF qualitative')) except Exception as e: return False, f'Exception building conformal RF estimator with exception {e}' return True, results ## Overriding of parent methods # def CF_quantitative_validation(self): # ''' performs validation for conformal quantitative models ''' # def CF_qualitative_validation(self): # ''' performs validation for conformal qualitative models ''' # def quantitativeValidation(self): # ''' performs validation for quantitative models ''' # def qualitativeValidation(self): # ''' performs validation for qualitative models ''' # def validate(self): # ''' Validates the model and computes suitable model quality scoring values''' # def optimize(self, X, Y, estimator, tune_parameters): # ''' optimizes a model using a grid search over a range of values for diverse parameters''' # def regularProject(self, Xb, results): # ''' projects a collection of query objects in a regular model, for obtaining predictions ''' # def conformalProject(self, Xb, results): # ''' projects a collection of query objects in a conformal model, for obtaining predictions ''' # def project(self, Xb, results): # ''' Uses the X matrix provided as argument to predict Y'''