def data_loader(dataset_name, random_seed=42, fix=False, test_ratio=0.2): if dataset_name in [ "bostonHousing", "energy", "wine-quality-red", "yacht", "meps_19", "meps_20", "meps_21", "star", "facebook_1", "facebook_2", "bio", 'blog_data', "concrete", "bike", "community" ]: X, y = datasets.GetDataset(dataset_name, base_path='datasets/') # Dataset is divided into test and train data based on test_ratio parameter X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_ratio, random_state=random_seed) X_train = np.asarray(X_train) y_train = np.asarray(y_train) X_test = np.asarray(X_test) y_test = np.asarray(y_test) # Input dimensions n_train = X_train.shape[0] in_shape = X_train.shape[1] idx = np.array(range(n_train)) # Features are normalized to (0,1) scalerX = MinMaxScaler(feature_range=(0, 1)) scalerX = scalerX.fit(X_train[idx]) X_train = scalerX.transform(X_train) X_test = scalerX.transform(X_test) # Scale the labels by dividing each by the mean absolute response mean_ytrain = np.mean(np.abs(y_train[idx])) y_train = np.squeeze(y_train) / mean_ytrain y_test = np.squeeze(y_test) / mean_ytrain return X_train, y_train, X_test, y_test else: raise AssertionError('Error: wrong data name')
def run_equalized_coverage_experiment(dataset_name, method, seed, save_to_csv=True, test_ratio=0.2): random_state_train_test = seed random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) if os.path.isdir('/scratch'): local_machine = 0 else: local_machine = 1 if local_machine: dataset_base_path = '/Users/romano/mydata/regression_data/' else: dataset_base_path = '/scratch/users/yromano/data/regression_data/' # desired miscoverage error alpha = 0.1 # desired quanitile levels quantiles = [0.05, 0.95] # name of dataset dataset_name_group_0 = dataset_name + "_non_white" dataset_name_group_1 = dataset_name + "_white" # load the dataset X, y = datasets.GetDataset(dataset_name, dataset_base_path) # divide the dataset into test and train based on the test_ratio parameter x_train, x_test, y_train, y_test = train_test_split( X, y, test_size=test_ratio, random_state=random_state_train_test) # In[2]: # compute input dimensions n_train = x_train.shape[0] in_shape = x_train.shape[1] # divide the data into proper training set and calibration set idx = np.random.permutation(n_train) n_half = int(np.floor(n_train / 2)) idx_train, idx_cal = idx[:n_half], idx[n_half:2 * n_half] # zero mean and unit variance scaling scalerX = StandardScaler() scalerX = scalerX.fit(x_train[idx_train]) # scale x_train = scalerX.transform(x_train) x_test = scalerX.transform(x_test) y_train = np.log(1.0 + y_train) y_test = np.log(1.0 + y_test) # reshape the data x_train = np.asarray(x_train) y_train = np.squeeze(np.asarray(y_train)) x_test = np.asarray(x_test) y_test = np.squeeze(np.asarray(y_test)) # display basic information print("Dataset: %s" % (dataset_name)) print( "Dimensions: train set (n=%d, p=%d) ; test set (n=%d, p=%d)" % (x_train.shape[0], x_train.shape[1], x_test.shape[0], x_test.shape[1])) # In[3]: dataset_name_vec = [] method_vec = [] coverage_vec = [] length_vec = [] seed_vec = [] test_ratio_vec = [] if method == "net": # pytorch's optimizer object nn_learn_func = torch.optim.Adam # number of epochs epochs = 1000 # learning rate lr = 0.0005 # mini-batch size batch_size = 64 # hidden dimension of the network hidden_size = 64 # dropout regularization rate dropout = 0.1 # weight decay regularization wd = 1e-6 # ratio of held-out data, used in cross-validation cv_test_ratio = 0.1 # seed for splitting the data in cross-validation. # Also used as the seed in quantile random forests function cv_random_state = 1 # In[4]: model = helper.MSENet_RegressorAdapter(model=None, fit_params=None, in_shape=in_shape, hidden_size=hidden_size, learn_func=nn_learn_func, epochs=epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state) nc = RegressorNc(model, SignErrorErrFunc()) y_lower, y_upper = helper.run_icp(nc, x_train, y_train, x_test, idx_train, idx_cal, alpha) method_name = "Marginal Conformal Neural Network" # compute and print average coverage and average length coverage_sample, length_sample = helper.compute_coverage_per_sample( y_test, y_lower, y_upper, alpha, method_name, x_test, condition) append_statistics(coverage_sample, length_sample, method_name, dataset_name_vec, method_vec, coverage_vec, length_vec, seed_vec, test_ratio_vec, seed, test_ratio, dataset_name_group_0, dataset_name_group_1) # In[] model = helper.MSENet_RegressorAdapter(model=None, fit_params=None, in_shape=in_shape, hidden_size=hidden_size, learn_func=nn_learn_func, epochs=epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state) nc = RegressorNc(model, SignErrorErrFunc()) y_lower, y_upper = helper.run_icp(nc, x_train, y_train, x_test, idx_train, idx_cal, alpha, condition) method_name = "Conditional Conformal Neural Network (joint)" # compute and print average coverage and average length coverage_sample, length_sample = helper.compute_coverage_per_sample( y_test, y_lower, y_upper, alpha, method_name, x_test, condition) append_statistics(coverage_sample, length_sample, method_name, dataset_name_vec, method_vec, coverage_vec, length_vec, seed_vec, test_ratio_vec, seed, test_ratio, dataset_name_group_0, dataset_name_group_1) # In[6] category_map = np.array([ condition((x_train[i, :], None)) for i in range(x_train.shape[0]) ]) categories = np.unique(category_map) estimator_list = [] nc_list = [] for i in range(len(categories)): # define a QRF model per group estimator_list.append( helper.MSENet_RegressorAdapter(model=None, fit_params=None, in_shape=in_shape, hidden_size=hidden_size, learn_func=nn_learn_func, epochs=epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state)) # define the CQR object nc_list.append(RegressorNc(estimator_list[i], SignErrorErrFunc())) # run CQR procedure y_lower, y_upper = helper.run_icp_sep(nc_list, x_train, y_train, x_test, idx_train, idx_cal, alpha, condition) method_name = "Conditional Conformal Neural Network (groupwise)" # compute and print average coverage and average length coverage_sample, length_sample = helper.compute_coverage_per_sample( y_test, y_lower, y_upper, alpha, method_name, x_test, condition) append_statistics(coverage_sample, length_sample, method_name, dataset_name_vec, method_vec, coverage_vec, length_vec, seed_vec, test_ratio_vec, seed, test_ratio, dataset_name_group_0, dataset_name_group_1) # In[] if method == "qnet": # pytorch's optimizer object nn_learn_func = torch.optim.Adam # number of epochs epochs = 1000 # learning rate lr = 0.0005 # mini-batch size batch_size = 64 # hidden dimension of the network hidden_size = 64 # dropout regularization rate dropout = 0.1 # weight decay regularization wd = 1e-6 # desired quantiles quantiles_net = [0.05, 0.95] # ratio of held-out data, used in cross-validation cv_test_ratio = 0.1 # seed for splitting the data in cross-validation. # Also used as the seed in quantile random forests function cv_random_state = 1 # In[7]: # define quantile neural network model quantile_estimator = helper.AllQNet_RegressorAdapter( model=None, fit_params=None, in_shape=in_shape, hidden_size=hidden_size, quantiles=quantiles_net, learn_func=nn_learn_func, epochs=epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state, use_rearrangement=False) # define the CQR object, computing the absolute residual error of points # located outside the estimated quantile neural network band nc = RegressorNc(quantile_estimator, QuantileRegAsymmetricErrFunc()) # run CQR procedure y_lower, y_upper = helper.run_icp(nc, x_train, y_train, x_test, idx_train, idx_cal, alpha) method_name = "Marginal CQR Neural Network" # compute and print average coverage and average length coverage_sample, length_sample = helper.compute_coverage_per_sample( y_test, y_lower, y_upper, alpha, method_name, x_test, condition) append_statistics(coverage_sample, length_sample, method_name, dataset_name_vec, method_vec, coverage_vec, length_vec, seed_vec, test_ratio_vec, seed, test_ratio, dataset_name_group_0, dataset_name_group_1) # In[] # define qnet model quantile_estimator = helper.AllQNet_RegressorAdapter( model=None, fit_params=None, in_shape=in_shape, hidden_size=hidden_size, quantiles=quantiles_net, learn_func=nn_learn_func, epochs=epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state, use_rearrangement=False) # define the CQR object nc = RegressorNc(quantile_estimator, QuantileRegAsymmetricErrFunc()) # run CQR procedure y_lower, y_upper = helper.run_icp(nc, x_train, y_train, x_test, idx_train, idx_cal, alpha, condition) method_name = "Conditional CQR Neural Network (joint)" # compute and print average coverage and average length coverage_sample, length_sample = helper.compute_coverage_per_sample( y_test, y_lower, y_upper, alpha, method_name, x_test, condition) append_statistics(coverage_sample, length_sample, method_name, dataset_name_vec, method_vec, coverage_vec, length_vec, seed_vec, test_ratio_vec, seed, test_ratio, dataset_name_group_0, dataset_name_group_1) # In[6] category_map = np.array([ condition((x_train[i, :], None)) for i in range(x_train.shape[0]) ]) categories = np.unique(category_map) quantile_estimator_list = [] nc_list = [] for i in range(len(categories)): # define a QRF model per group quantile_estimator_list.append( helper.AllQNet_RegressorAdapter(model=None, fit_params=None, in_shape=in_shape, hidden_size=hidden_size, quantiles=quantiles_net, learn_func=nn_learn_func, epochs=epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state, use_rearrangement=False)) # append a CQR object nc_list.append( RegressorNc(quantile_estimator_list[i], QuantileRegAsymmetricErrFunc())) # run CQR procedure y_lower, y_upper = helper.run_icp_sep(nc_list, x_train, y_train, x_test, idx_train, idx_cal, alpha, condition) method_name = "Conditional CQR Neural Network (groupwise)" # compute and print average coverage and average length coverage_sample, length_sample = helper.compute_coverage_per_sample( y_test, y_lower, y_upper, alpha, method_name, x_test, condition) append_statistics(coverage_sample, length_sample, method_name, dataset_name_vec, method_vec, coverage_vec, length_vec, seed_vec, test_ratio_vec, seed, test_ratio, dataset_name_group_0, dataset_name_group_1) # In[] ############### Summary coverage_str = 'Coverage (expected ' + str(100 - alpha * 100) + '%)' if save_to_csv: outdir = './results/' if not os.path.exists(outdir): os.mkdir(outdir) out_name = outdir + 'results.csv' df = pd.DataFrame({ 'name': dataset_name_vec, 'method': method_vec, coverage_str: coverage_vec, 'Avg. Length': length_vec, 'seed': seed_vec, 'train test ratio': test_ratio_vec }) if os.path.isfile(out_name): df2 = pd.read_csv(out_name) df = pd.concat([df2, df], ignore_index=True) df.to_csv(out_name, index=False)
def experiment(params): """ Estimate prediction intervals and print the average length and coverage Parameters ---------- params : a dictionary with the following fields 'dataset_name' : string, name of dataset 'method' : string, conformalization method 'level' : string, nominal level for black-box (either 'fixed' or 'cv') 'ratio' : numeric, percentage of data used for training 'seed' : random seed """ # Extract main parameters dataset_name = params["data"] method = params["method"] level = params["level"] ratio_train = params["ratio"] seed = params["seed"] # Determines the size of test set test_ratio = 0.2 # conformal prediction miscoverage level significance = 0.1 # Quantiles quantiles = [0.05, 0.95] # Quantiles for training if level == "cv": quantiles_net = [0.1, 0.5, 0.9] else: quantiles_net = [0.05, 0.5, 0.95] # List of conformalization methods conf_methods_list = ["CQR", "CQRm", "CQRr"] # Set random seeds random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) # Initialize cuda if available if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) # Load the data try: X, y = datasets.GetDataset(dataset_name, base_dataset_path) print("Loaded dataset '" + dataset_name + "'.") sys.stdout.flush() except: print("Error: cannot load dataset " + dataset_name) return # Dataset is divided into test and train data based on test_ratio parameter X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, random_state=seed) # Reshape the data X_train = np.asarray(X_train) y_train = np.asarray(y_train) X_test = np.asarray(X_test) y_test = np.asarray(y_test) n_train = X_train.shape[0] # Print input dimensions print( "Data size: train (%d, %d), test (%d, %d)" % (X_train.shape[0], X_train.shape[1], X_test.shape[0], X_test.shape[1])) sys.stdout.flush() # Set seed for splitting the data into proper train and calibration np.random.seed(seed) idx = np.random.permutation(n_train) # Divide the data into proper training set and calibration set n_half = int(np.floor(n_train * ratio_train / 100.0)) idx_train, idx_cal = idx[:n_half], idx[n_half:n_train] # Zero mean and unit variance scaling of the train and test features scalerX = StandardScaler() scalerX = scalerX.fit(X_train[idx_train]) X_train = scalerX.transform(X_train) X_test = scalerX.transform(X_test) # Scale the labels by dividing each by the mean absolute response mean_ytrain = np.mean(np.abs(y_train[idx_train])) y_train = np.squeeze(y_train) / mean_ytrain y_test = np.squeeze(y_test) / mean_ytrain if params["method"] == 'cqr_quantile_forest': # Parameters of the random forest params = dict() params['n_estimators'] = 1000 params['max_features'] = X_train.shape[1] params['min_samples_leaf'] = 1 params['random_state'] = seed params['n_jobs'] = 5 params['cv'] = (level == "cv") # Initialize random forest regressor model = RandomForestQR(params, quantiles, verbose=verbose) # Initialize regressor for hyperparameter tuning model_tuning = RandomForestQR(params, quantiles, verbose=verbose) elif params["method"] == 'cqr_quantile_net': # Parameters of the neural network params = dict() params['in_shape'] = X_train.shape[1] params['epochs'] = 1000 params['lr'] = 0.0005 params['hidden_size'] = 64 params['batch_size'] = 64 params['dropout'] = 0.1 params['wd'] = 1e-6 params['test_ratio'] = 0.05 params['random_state'] = seed # Initialize neural network regressor model = NeuralNetworkQR(params, quantiles_net, verbose=verbose) # Initialize regressor for hyperparameter tuning model_tuning = NeuralNetworkQR(params, quantiles, verbose=verbose) else: print("Uknown method.") sys.exit() cqr = ConformalizedQR(model, model_tuning, X_train, y_train, idx_train, idx_cal, significance) for conf_method in conf_methods_list: # Compute CQR intervals lower, upper = cqr.predict(X_test, y_test, significance, method=conf_method) # Compute coverage and widths covered = (y_test >= lower) & (y_test <= upper) widths = upper - lower # Print update print(conf_method + ": " + "coverage %.3f, width %.3f" % (np.mean(covered), np.mean(widths))) sys.stdout.flush()
def run_experiment(dataset_name, test_method, random_state_train_test, save_to_csv=True): """ Estimate prediction intervals and print the average length and coverage Parameters ---------- dataset_name : array of strings, list of datasets test_method : string, method to be tested, estimating the 90% prediction interval random_state_train_test : integer, random seed to be used save_to_csv : boolean, save average length and coverage to csv (True) or not (False) """ dataset_name_vec = [] method_vec = [] coverage_vec = [] length_vec = [] seed_vec = [] seed = random_state_train_test random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) coverage_linear=0 length_linear=0 coverage_linear_local=0 length_linear_local=0 coverage_net=0 length_net=0 coverage_net_local=0 length_net_local=0 coverage_forest=0 length_forest=0 coverage_forest_local=0 length_forest_local=0 coverage_cp_qnet=0 length_cp_qnet=0 coverage_qnet=0 length_qnet=0 coverage_cp_sign_qnet=0 length_cp_sign_qnet=0 coverage_cp_re_qnet=0 length_cp_re_qnet=0 coverage_re_qnet=0 length_re_qnet=0 coverage_cp_sign_re_qnet=0 length_cp_sign_re_qnet=0 coverage_cp_qforest=0 length_cp_qforest=0 coverage_qforest=0 length_qforest=0 coverage_cp_sign_qforest=0 length_cp_sign_qforest=0 # determines the size of test set test_ratio = 0.2 # conformal prediction miscoverage level significance = 0.1 # desired quantile levels, used by the quantile regression methods quantiles = [0.05, 0.95] # Random forests parameters (shared by conditional quantile random forests # and conditional mean random forests regression). n_estimators = 1000 # usual random forests n_estimators parameter min_samples_leaf = 1 # default parameter of sklearn # Quantile random forests parameters. # See QuantileForestRegressorAdapter class for more details quantiles_forest = [5, 95] CV_qforest = True coverage_factor = 0.85 cv_test_ratio = 0.05 cv_random_state = 1 cv_range_vals = 30 cv_num_vals = 10 # Neural network parameters (shared by conditional quantile neural network # and conditional mean neural network regression) # See AllQNet_RegressorAdapter and MSENet_RegressorAdapter in helper.py nn_learn_func = torch.optim.Adam epochs = 1000 lr = 0.0005 hidden_size = 64 batch_size = 64 dropout = 0.1 wd = 1e-6 # Ask for a reduced coverage when tuning the network parameters by # cross-validation to avoid too conservative initial estimation of the # prediction interval. This estimation will be conformalized by CQR. quantiles_net = [0.1, 0.9] # local conformal prediction parameter. # See RegressorNc class for more details. beta = 1 beta_net = 1 # local conformal prediction parameter. The local ridge regression method # uses nearest neighbor regression as the MAD estimator. # Number of neighbors used by nearest neighbor regression. n_neighbors = 11 print(dataset_name) sys.stdout.flush() try: # load the dataset X, y = datasets.GetDataset(dataset_name, base_dataset_path) except: print("CANNOT LOAD DATASET!") return # Dataset is divided into test and train data based on test_ratio parameter X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, random_state=random_state_train_test) # zero mean and unit variance scaling of the train and test features scalerX = StandardScaler() scalerX = scalerX.fit(X_train) X_train = scalerX.transform(X_train) X_test = scalerX.transform(X_test) # scale the labels by dividing each by the mean absolute response max_ytrain = np.mean(np.abs(y_train)) y_train = y_train/max_ytrain y_test = y_test/max_ytrain # fit a simple ridge regression model (sanity check) model = linear_model.RidgeCV() model = model.fit(X_train, y_train) predicted_data = model.predict(X_test).astype(np.float32) # calculate the normalized mean squared error print("Ridge relative error: %f" % (np.sum((y_test-predicted_data)**2)/np.sum(y_test**2))) sys.stdout.flush() # reshape the data X_train = np.asarray(X_train) y_train = np.squeeze(np.asarray(y_train)) X_test = np.asarray(X_test) y_test = np.squeeze(np.asarray(y_test)) # input dimensions n_train = X_train.shape[0] in_shape = X_train.shape[1] print("Size: train (%d, %d), test (%d, %d)" % (X_train.shape[0], X_train.shape[1], X_test.shape[0], X_test.shape[1])) sys.stdout.flush() # set seed for splitting the data into proper train and calibration np.random.seed(seed) idx = np.random.permutation(n_train) # divide the data into proper training set and calibration set n_half = int(np.floor(n_train/2)) idx_train, idx_cal = idx[:n_half], idx[n_half:2*n_half] ######################## Linear if 'linear' == test_method: model = linear_model.RidgeCV() nc = RegressorNc(model) y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance) if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"Ridge") coverage_linear, length_linear = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Ridge") dataset_name_vec.append(dataset_name) method_vec.append('Ridge') coverage_vec.append(coverage_linear) length_vec.append(length_linear) seed_vec.append(seed) nc = NcFactory.create_nc( linear_model.RidgeCV(), normalizer_model=KNeighborsRegressor(n_neighbors=n_neighbors) ) y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance) if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"Ridge-L") coverage_linear_local, length_linear_local = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Ridge-L") dataset_name_vec.append(dataset_name) method_vec.append('Ridge-L') coverage_vec.append(coverage_linear_local) length_vec.append(length_linear_local) seed_vec.append(seed) ######################### Neural net if 'neural_net' == test_method: model = helper.MSENet_RegressorAdapter(model=None, fit_params=None, in_shape = in_shape, hidden_size = hidden_size, learn_func = nn_learn_func, epochs = epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state) nc = RegressorNc(model) y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance) if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"Net") coverage_net, length_net = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Net") dataset_name_vec.append(dataset_name) method_vec.append('Net') coverage_vec.append(coverage_net) length_vec.append(length_net) seed_vec.append(seed) normalizer_adapter = helper.MSENet_RegressorAdapter(model=None, fit_params=None, in_shape = in_shape, hidden_size = hidden_size, learn_func = nn_learn_func, epochs = epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state) adapter = helper.MSENet_RegressorAdapter(model=None, fit_params=None, in_shape = in_shape, hidden_size = hidden_size, learn_func = nn_learn_func, epochs = epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state) normalizer = RegressorNormalizer(adapter, normalizer_adapter, AbsErrorErrFunc()) nc = RegressorNc(adapter, AbsErrorErrFunc(), normalizer, beta=beta_net) y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance) if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"Net-L") coverage_net_local, length_net_local = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Net-L") dataset_name_vec.append(dataset_name) method_vec.append('Net-L') coverage_vec.append(coverage_net_local) length_vec.append(length_net_local) seed_vec.append(seed) ################## Random Forest if 'random_forest' == test_method: model = RandomForestRegressor(n_estimators=n_estimators,min_samples_leaf=min_samples_leaf, random_state=0) nc = RegressorNc(model, AbsErrorErrFunc()) y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance) if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"RF") coverage_forest, length_forest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"RF") dataset_name_vec.append(dataset_name) method_vec.append('RF') coverage_vec.append(coverage_forest) length_vec.append(length_forest) seed_vec.append(seed) normalizer_adapter = RandomForestRegressor(n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=0) adapter = RandomForestRegressor(n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=0) normalizer = RegressorNormalizer(adapter, normalizer_adapter, AbsErrorErrFunc()) nc = RegressorNc(adapter, AbsErrorErrFunc(), normalizer, beta=beta) y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance) if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"RF-L") coverage_forest_local, length_forest_local = helper.compute_coverage(y_test,y_lower,y_upper,significance,"RF-L") dataset_name_vec.append(dataset_name) method_vec.append('RF-L') coverage_vec.append(coverage_forest_local) length_vec.append(length_forest_local) seed_vec.append(seed) ################## Quantile Net if 'quantile_net' == test_method: model_full = helper.AllQNet_RegressorAdapter(model=None, fit_params=None, in_shape = in_shape, hidden_size = hidden_size, quantiles = quantiles, learn_func = nn_learn_func, epochs = epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state, use_rearrangement=False) model_full.fit(X_train, y_train) tmp = model_full.predict(X_test) y_lower = tmp[:,0] y_upper = tmp[:,1] if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"QNet") coverage_qnet, length_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"QNet") dataset_name_vec.append(dataset_name) method_vec.append('QNet') coverage_vec.append(coverage_qnet) length_vec.append(length_qnet) seed_vec.append(seed) if 'cqr_quantile_net' == test_method: model = helper.AllQNet_RegressorAdapter(model=None, fit_params=None, in_shape = in_shape, hidden_size = hidden_size, quantiles = quantiles_net, learn_func = nn_learn_func, epochs = epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state, use_rearrangement=False) nc = RegressorNc(model, QuantileRegErrFunc()) y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance) if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"CQR Net") coverage_cp_qnet, length_cp_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"CQR Net") dataset_name_vec.append(dataset_name) method_vec.append('CQR Net') coverage_vec.append(coverage_cp_qnet) length_vec.append(length_cp_qnet) seed_vec.append(seed) if 'cqr_asymmetric_quantile_net' == test_method: model = helper.AllQNet_RegressorAdapter(model=None, fit_params=None, in_shape = in_shape, hidden_size = hidden_size, quantiles = quantiles_net, learn_func = nn_learn_func, epochs = epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state, use_rearrangement=False) nc = RegressorNc(model, QuantileRegAsymmetricErrFunc()) y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance) if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"CQR Sign Net") coverage_cp_sign_qnet, length_cp_sign_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"CQR Sign Net") dataset_name_vec.append(dataset_name) method_vec.append('CQR Sign Net') coverage_vec.append(coverage_cp_sign_qnet) length_vec.append(length_cp_sign_qnet) seed_vec.append(seed) ################### Rearrangement Quantile Net if 'rearrangement' == test_method: model_full = helper.AllQNet_RegressorAdapter(model=None, fit_params=None, in_shape = in_shape, hidden_size = hidden_size, quantiles = quantiles, learn_func = nn_learn_func, epochs = epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state, use_rearrangement=True) model_full.fit(X_train, y_train) tmp = model_full.predict(X_test) y_lower = tmp[:,0] y_upper = tmp[:,1] if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"Rearrange QNet") coverage_re_qnet, length_re_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Rearrange QNet") dataset_name_vec.append(dataset_name) method_vec.append('Rearrange QNet') coverage_vec.append(coverage_re_qnet) length_vec.append(length_re_qnet) seed_vec.append(seed) if 'cqr_rearrangement' == test_method: model = helper.AllQNet_RegressorAdapter(model=None, fit_params=None, in_shape = in_shape, hidden_size = hidden_size, quantiles = quantiles_net, learn_func = nn_learn_func, epochs = epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state, use_rearrangement=True) nc = RegressorNc(model, QuantileRegErrFunc()) y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance) if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"Rearrange CQR Net") coverage_cp_re_qnet, length_cp_re_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Rearrange CQR Net") dataset_name_vec.append(dataset_name) method_vec.append('Rearrange CQR Net') coverage_vec.append(coverage_cp_re_qnet) length_vec.append(length_cp_re_qnet) seed_vec.append(seed) if 'cqr_asymmetric_rearrangement' == test_method: model = helper.AllQNet_RegressorAdapter(model=None, fit_params=None, in_shape = in_shape, hidden_size = hidden_size, quantiles = quantiles_net, learn_func = nn_learn_func, epochs = epochs, batch_size=batch_size, dropout=dropout, lr=lr, wd=wd, test_ratio=cv_test_ratio, random_state=cv_random_state, use_rearrangement=True) nc = RegressorNc(model, QuantileRegAsymmetricErrFunc()) y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance) if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"Rearrange CQR Sign Net") coverage_cp_sign_re_qnet, length_cp_sign_re_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Rearrange CQR Net") dataset_name_vec.append(dataset_name) method_vec.append('Rearrange CQR Sign Net') coverage_vec.append(coverage_cp_sign_re_qnet) length_vec.append(length_cp_sign_re_qnet) seed_vec.append(seed) ################### Quantile Random Forest if 'quantile_forest' == test_method: params_qforest = dict() params_qforest["random_state"] = 0 params_qforest["min_samples_leaf"] = min_samples_leaf params_qforest["n_estimators"] = n_estimators params_qforest["max_features"] = X_train.shape[1] params_qforest["CV"]=False params_qforest["coverage_factor"] = coverage_factor params_qforest["test_ratio"]=cv_test_ratio params_qforest["random_state"]=cv_random_state params_qforest["range_vals"] = cv_range_vals params_qforest["num_vals"] = cv_num_vals model_full = helper.QuantileForestRegressorAdapter(model = None, fit_params=None, quantiles=np.dot(100,quantiles), params = params_qforest) model_full.fit(X_train, y_train) tmp = model_full.predict(X_test) y_lower = tmp[:,0] y_upper = tmp[:,1] if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"QRF") coverage_qforest, length_qforest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"QRF") dataset_name_vec.append(dataset_name) method_vec.append('QRF') coverage_vec.append(coverage_qforest) length_vec.append(length_qforest) seed_vec.append(seed) if 'cqr_quantile_forest' == test_method: params_qforest = dict() params_qforest["random_state"] = 0 params_qforest["min_samples_leaf"] = min_samples_leaf params_qforest["n_estimators"] = n_estimators params_qforest["max_features"] = X_train.shape[1] params_qforest["CV"]=CV_qforest params_qforest["coverage_factor"] = coverage_factor params_qforest["test_ratio"]=cv_test_ratio params_qforest["random_state"]=cv_random_state params_qforest["range_vals"] = cv_range_vals params_qforest["num_vals"] = cv_num_vals model = helper.QuantileForestRegressorAdapter(model = None, fit_params=None, quantiles=quantiles_forest, params = params_qforest) nc = RegressorNc(model, QuantileRegErrFunc()) y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance) if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"CQR RF") coverage_cp_qforest, length_cp_qforest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"CQR RF") dataset_name_vec.append(dataset_name) method_vec.append('CQR RF') coverage_vec.append(coverage_cp_qforest) length_vec.append(length_cp_qforest) seed_vec.append(seed) if 'cqr_asymmetric_quantile_forest' == test_method: params_qforest = dict() params_qforest["random_state"] = 0 params_qforest["min_samples_leaf"] = min_samples_leaf params_qforest["n_estimators"] = n_estimators params_qforest["max_features"] = X_train.shape[1] params_qforest["CV"]=CV_qforest params_qforest["coverage_factor"] = coverage_factor params_qforest["test_ratio"]=cv_test_ratio params_qforest["random_state"]=cv_random_state params_qforest["range_vals"] = cv_range_vals params_qforest["num_vals"] = cv_num_vals model = helper.QuantileForestRegressorAdapter(model = None, fit_params=None, quantiles=quantiles_forest, params = params_qforest) nc = RegressorNc(model, QuantileRegAsymmetricErrFunc()) y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance) if plot_results: helper.plot_func_data(y_test,y_lower,y_upper,"CQR Sign RF") coverage_cp_sign_qforest, length_cp_sign_qforest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"CQR Sign RF") dataset_name_vec.append(dataset_name) method_vec.append('CQR Sign RF') coverage_vec.append(coverage_cp_sign_qforest) length_vec.append(length_cp_sign_qforest) seed_vec.append(seed) # tmp = model.predict(X_test) # y_lower = tmp[:,0] # y_upper = tmp[:,1] # if plot_results: # helper.plot_func_data(y_test,y_lower,y_upper,"QRF") # coverage_qforest, length_qforest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"QRF") # # dataset_name_vec.append(dataset_name) # method_vec.append('QRF') # coverage_vec.append(coverage_qforest) # length_vec.append(length_qforest) # seed_vec.append(seed) ############### Summary coverage_str = 'Coverage (expected ' + str(100 - significance*100) + '%)' results = np.array([[dataset_name, coverage_str, 'Avg. Length', 'Seed'], ['CP Linear', coverage_linear, length_linear, seed], ['CP Linear Local', coverage_linear_local, length_linear_local, seed], ['CP Neural Net', coverage_net, length_net, seed], ['CP Neural Net Local', coverage_net_local, length_net_local, seed], ['CP Random Forest', coverage_forest, length_forest, seed], ['CP Random Forest Local', coverage_forest_local, length_forest_local, seed], ['CP Quantile Net', coverage_cp_qnet, length_cp_qnet, seed], ['CP Asymmetric Quantile Net', coverage_cp_sign_qnet, length_cp_sign_qnet, seed], ['Quantile Net', coverage_qnet, length_qnet, seed], ['CP Rearrange Quantile Net', coverage_cp_re_qnet, length_cp_re_qnet, seed], ['CP Asymmetric Rearrange Quantile Net', coverage_cp_sign_re_qnet, length_cp_sign_re_qnet, seed], ['Rearrange Quantile Net', coverage_re_qnet, length_re_qnet, seed], ['CP Quantile Random Forest', coverage_cp_qforest, length_cp_qforest, seed], ['CP Asymmetric Quantile Random Forest', coverage_cp_sign_qforest, length_cp_sign_qforest, seed], ['Quantile Random Forest', coverage_qforest, length_qforest, seed]]) results_ = pd.DataFrame(data=results[1:,1:], index=results[1:,0], columns=results[0,1:]) print("== SUMMARY == ") print("dataset name: " + dataset_name) print(results_) sys.stdout.flush() if save_to_csv: results = pd.DataFrame(results) outdir = './results/' if not os.path.exists(outdir): os.mkdir(outdir) out_name = outdir + 'results.csv' df = pd.DataFrame({'name': dataset_name_vec, 'method': method_vec, coverage_str : coverage_vec, 'Avg. Length' : length_vec, 'seed': seed_vec}) if os.path.isfile(out_name): df2 = pd.read_csv(out_name) df = pd.concat([df2, df], ignore_index=True) df.to_csv(out_name, index=False)