def setUp(self): # setting up our random data-set rng = np.random.RandomState(42) # D1 = train machines; D2 = create COBRA; D3 = calibrate epsilon, alpha; D4 = testing n_features = 20 D1, D2, D3, D4 = 200, 200, 200, 200 D = D1 + D2 + D3 + D4 X = rng.uniform(-1, 1, D * n_features).reshape(D, n_features) Y = np.power(X[:, 1], 2) + np.power(X[:, 3], 3) + np.exp(X[:, 10]) # training data-set X_train = X[:D1 + D2] X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4] # for testing Y_train = Y[:D1 + D2] Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4] cobra = Cobra(random_state=0, epsilon=0.5) cobra.fit(X_train, Y_train) ewa = Ewa(random_state=0) ewa.fit(X_train, Y_train) kernel = KernelCobra(random_state=0) kernel.fit(X_train, Y_train) self.test_data = X_test self.cobra = cobra self.ewa = ewa self.kernelcobra = kernel
def setUp(self): # setting up our random data-set rng = np.random.RandomState(42) # D1 = train machines; D2 = create COBRA; D3 = calibrate epsilon, alpha; D4 = testing n_features = 20 D1, D2, D3, D4 = 200, 200, 200, 200 D = D1 + D2 + D3 + D4 X = rng.uniform(-1, 1, D * n_features).reshape(D, n_features) Y = np.power(X[:,1], 2) + np.power(X[:,3], 3) + np.exp(X[:,10]) # training data-set X_train = X[:D1 + D2] X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4] # for testing Y_train = Y[:D1 + D2] Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4] cobra = Cobra(random_state=0, epsilon=0.5) cobra.fit(X_train, Y_train) self.test_data = X_test self.test_response = Y_test self.cobra = cobra self.cobra_vis = Visualisation(self.cobra, self.test_data[0:4], self.test_response[0:4]) self.indices, self.mse = self.cobra_vis.indice_info(self.test_data[0:4], self.test_response[0:4], epsilon=self.cobra.epsilon) ewa = Ewa(random_state=0) ewa.fit(X_train, Y_train) self.ewa = ewa self.ewa_vis = Visualisation(self.ewa, self.test_data[0:4], self.test_response[0:4])
def optimal_alpha_grid(self, X, y, line_points=200, info=False): """ Find the optimal epsilon and alpha for a single query point for the COBRA predictor. Parameteres ----------- X: array-like, [n_features] Vector for which we want optimal alpha and epsilon values y: float Target value for query to compare. line_points: integer, optional Number of epsilon values to traverse the grid. info: bool, optional Returns MSE dictionary for each epsilon/alpha value Returns ------- MSE: dictionary mapping (alpha, epsilon) with mean squared errors opt: optimal epislon/alpha combination """ # code to find maximum and minimum distance between predictions to create grid a, size = sorted(self.aggregate.all_predictions_), len( self.aggregate.all_predictions_) res = [a[i + 1] - a[i] for i in range(size) if i + 1 < size] emin = min(res) emax = max(a) - min(a) erange = np.linspace(emin, emax, line_points) n_machines = np.arange(1, len(self.aggregate.estimators_) + 1) MSE = {} # looping over epsilon and alpha values for epsilon in erange: for num in n_machines: machine = Cobra(random_state=self.random_state, epsilon=epsilon) machine.fit(self.aggregate.X_, self.aggregate.y_) result = machine.predict(X.reshape(1, -1), alpha=num) MSE[(num, epsilon)] = np.square(y - result) if info: return MSE opt = min(MSE, key=MSE.get) return opt, MSE[opt]
def optimal_epsilon(self, X, y, line_points=200, info=False): """ Find the optimal epsilon value for the COBRA predictor. Parameteres ----------- X: array-like, [n_features] Vector for which we want for optimal epsilon. y: float Target value for query to compare. line_points: integer, optional Number of epsilon values to traverse the grid. info: bool, optional Returns MSE dictionary for each epsilon value. Returns ------- MSE: dictionary mapping epsilon with mean squared errors opt: optimal epsilon value """ a, size = sorted(self.aggregate.all_predictions_), len( self.aggregate.all_predictions_) res = [a[i + 1] - a[i] for i in range(size) if i + 1 < size] emin = min(res) emax = max(a) - min(a) erange = np.linspace(emin, emax, line_points) MSE = {} for epsilon in erange: machine = Cobra(random_state=self.random_state, epsilon=epsilon) machine.fit(self.aggregate.X_, self.aggregate.y_) results = machine.predict(X) MSE[epsilon] = (mean_squared_error(y, results)) if info: return MSE opt = min(MSE, key=MSE.get) return opt, MSE[opt]
def set_up(): # setting up our random data-set rng = np.random.RandomState(42) # D1 = train machines; D2 = create COBRA; D3 = calibrate epsilon, alpha; D4 = testing n_features = 20 D1, D2, D3, D4 = 200, 200, 200, 200 D = D1 + D2 + D3 + D4 X = rng.uniform(-1, 1, D * n_features).reshape(D, n_features) Y = np.power(X[:, 1], 2) + np.power(X[:, 3], 3) + np.exp(X[:, 10]) # training data-set X_train = X[:D1 + D2] X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4] # for testing Y_train = Y[:D1 + D2] Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4] cobra = Cobra(random_state=0, epsilon=0.5) cobra.fit(X_train, Y_train) test_data = X_test test_response = Y_test cobra_vis = Visualisation(cobra, test_data[0:4], test_response[0:4]) return cobra_vis
def optimal_alpha(self, X, y, single=False, epsilon=None, info=False): """ Find the optimal alpha for testing data for the COBRA predictor. Parameteres ----------- X: array-like, [n_features] Vector for which we want optimal alpha values y: float Target value for query to compare. single: boolean, optional Option to calculate optimal alpha for a single query point instead. info: bool, optional Returns MSE dictionary for each alpha value epsilon: float, optional fixed epsilon value to help determine optimal alpha. Returns ------- MSE: dictionary mapping alpha with mean squared errors opt: optimal alpha combination """ if epsilon is None: epsilon = self.aggregate.epsilon MSE = {} for alpha in range(1, len(self.aggregate.estimators_) + 1): machine = Cobra(random_state=self.random_state, epsilon=epsilon) machine.fit(self.aggregate.X_, self.aggregate.y_) # for a single data point if single: result = machine.predict(X, alpha=alpha) MSE[alpha] = np.square(y - result) else: results = machine.predict(X, alpha=alpha) MSE[alpha] = (mean_squared_error(y, results)) if info: return MSE opt = min(MSE, key=MSE.get) return opt, MSE[opt]
def define_cobra_model(train_names, training_noise_kind, patch_size=1, optimi=True, verbose=False): """ Train a cobra model for denoising task INPUT : train_names : list containing the name of images used to train the model patch_size : use patch of size (2*patch_size+1)*(2*patch_size+1) as features verbose : print or not information during the training OUTPUT : cobra : trained model """ #initial cobra parameters Alpha = 4 #how many machines must agree Epsilon = 0.2 # confidence parameter print("Training cobra model...") Xtrain, Xtrain1, Xtrain2, Ytrain = load_training_data( train_names, training_noise_kind, patch_size) cobra = Cobra(epsilon=Epsilon, machines=Alpha) # create a cobra machine #cobra.fit(Xtrain, Ytrain, default=False, X_k=Xtrain1, X_l=Xtrain2, y_k=Ytrain, y_l=Ytrain) # fit the cobra machine with our data cobra.fit(Xtrain, Ytrain) # fit the cobra machine with our data print("Loading machines...") cobra.load_machine('bilateral', machine('bilateral', 0, patch_size)) cobra.load_machine('nlmeans', machine('nlmeans', 1, patch_size)) cobra.load_machine('gauss', machine('gauss', 2, patch_size)) cobra.load_machine('median', machine('median', 3, patch_size)) cobra.load_machine('TVchambolle', machine('TVchambolle', 4, patch_size)) cobra.load_machine('richardson_lucy', machine('richardson_lucy', 5, patch_size)) cobra.load_machine('inpainting', machine('inpainting', 6, patch_size)) cobra.load_machine('ksvd', machine('ksvd', 7, patch_size)) cobra.load_machine('lee', machine('lee', 8, patch_size)) # cobra.load_machine('bm3d', machine('bm3d', 9, patch_size)) print("Loading machine predictions...") cobra.load_machine_predictions() #agregate if verbose: print(cobra.machine_predictions_) if optimi: print("Parameter optimisation") cobra_diagnostics = Diagnostics(cobra, Xtrain, Ytrain) Epsilon_opt, MSE = cobra_diagnostics.optimal_epsilon(Xtrain, Ytrain, line_points=100, info=False) Alpha_opt, MSE = cobra_diagnostics.optimal_alpha(Xtrain, Ytrain, epsilon=Epsilon_opt, info=False) if verbose: print("epsilon = ", Epsilon_opt) print("alpha = ", Alpha_opt) print("Training cobra model again...") cobra = Cobra(epsilon=Epsilon_opt, machines=Alpha_opt) cobra.fit(Xtrain, Ytrain, default=False, X_k=Xtrain1, X_l=Xtrain2, y_k=Ytrain, y_l=Ytrain) cobra.load_machine('bilateral', machine('bilateral', 0, patch_size)) cobra.load_machine('nlmeans', machine('nlmeans', 1, patch_size)) cobra.load_machine('gauss', machine('gauss', 2, patch_size)) cobra.load_machine('median', machine('median', 3, patch_size)) cobra.load_machine('TVchambolle', machine('TVchambolle', 4, patch_size)) cobra.load_machine('richardson_lucy', machine('richardson_lucy', 5, patch_size)) cobra.load_machine('inpainting', machine('inpainting', 6, patch_size)) cobra.load_machine('ksvd', machine('ksvd', 7, patch_size)) cobra.load_machine('lee', machine('lee', 8, patch_size)) # cobra.load_machine('bm3d', machine('bm3d', 9, patch_size)) cobra.load_machine_predictions() if verbose: print("Loading machine predictions...") print(cobra.machine_predictions_) return (cobra, Alpha, Epsilon)
def boxplot(self, reps=100, info=False): """ Plots boxplots of machines. Parameters ---------- reps: int, optional Number of times to repeat experiments for boxplot. info: boolean, optional Returns data """ if type(self.aggregate) is Cobra: MSE = {k: [] for k, v in self.aggregate.machines_.items()} MSE["COBRA"] = [] for i in range(0, reps): cobra = Cobra(random_state=self.random_state, epsilon=self.aggregate.epsilon) X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state) cobra.fit(X, y, default=False) cobra.split_data(shuffle_data=True) for machine in self.aggregate.machines_: self.aggregate.machines_[machine].fit(cobra.X_k_, cobra.y_k_) cobra.load_machine(machine, self.aggregate.machines_[machine]) cobra.load_machine_predictions() X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state) for machine in cobra.machines_: MSE[machine].append(mean_squared_error(y_test, cobra.machines_[machine].predict(X_test))) MSE["COBRA"].append(mean_squared_error(y_test, cobra.predict(X_test))) data, labels = [], [] for machine in MSE: data.append(MSE[machine]) labels.append(machine) if type(self.aggregate) is Ewa: MSE = {k: [] for k, v in self.aggregate.machines_.items()} MSE["EWA"] = [] for i in range(0, reps): ewa = Ewa(random_state=self.random_state, beta=self.aggregate.beta) X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state) ewa.fit(X, y, default=False) ewa.split_data(shuffle_data=True) for machine in self.aggregate.machines_: self.aggregate.machines_[machine].fit(ewa.X_k_, ewa.y_k_) ewa.load_machine(machine, self.aggregate.machines_[machine]) ewa.load_machine_weights(self.aggregate.beta) X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state) for machine in ewa.machines_: MSE[machine].append(mean_squared_error(y_test, ewa.machines_[machine].predict(X_test))) MSE["EWA"].append(mean_squared_error(y_test, ewa.predict(X_test))) data, labels = [], [] for machine in MSE: data.append(MSE[machine]) labels.append(machine) if type(self.aggregate) is ClassifierCobra: errors = {k: [] for k, v in self.aggregate.machines_.items()} errors["ClassifierCobra"] = [] for i in range(0, reps): cc = ClassifierCobra(random_state=self.random_state) X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state) cc.fit(X, y, default=False) cc.split_data(shuffle_data=True) for machine in self.aggregate.machines_: self.aggregate.machines_[machine].fit(cc.X_k_, cc.y_k_) cc.load_machine(machine, self.aggregate.machines_[machine]) cc.load_machine_predictions() X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state) for machine in cc.machines_: errors[machine].append(1 - accuracy_score(y_test, cc.machines_[machine].predict(X_test))) errors["ClassifierCobra"].append(1 - accuracy_score(y_test, cc.predict(X_test))) data, labels = [], [] for machine in errors: data.append(errors[machine]) labels.append(machine) plt.figure(figsize=(self.plot_size, self.plot_size)) plt.boxplot(data, labels=labels) plt.show() if info: return data
D = D1 + D2 + D3 + D4 X = rng.uniform(-1, 1, D * n_features).reshape(D, n_features) # Y = np.power(X[:,1], 2) + np.power(X[:,3], 3) + np.exp(X[:,10]) Y = np.power(X[:,0], 2) + np.power(X[:,1], 3) # training data-set X_train = X[:D1 + D2] X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4] X_eps = X[D1 + D2:D1 + D2 + D3] # for testing Y_train = Y[:D1 + D2] Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4] Y_eps = Y[D1 + D2:D1 + D2 + D3] # set up our COBRA machine with the data cobra = Cobra(epsilon=0.5) cobra.fit(X_train, Y_train) ###################################################################### # Plotting COBRA # ~~~~~~~~~~~~~~ # # We use the visualisation class to plot our results, and for various # visualisations. # cobra_vis = Visualisation(cobra, X_test, Y_test) # to plot our machines, we need a linspace as input. This is the 'scale' to plot and should be the range of the results # since our data ranges from -1 to 1 it is such - and we space it out to a hundred points
X_eps = X[D1 + D2:D1 + D2 + D3] # for testing Y_train = Y[:D1 + D2] Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4] Y_eps = Y[D1 + D2:D1 + D2 + D3] ###################################################################### # Similar to other scikit-learn estimators, we set up our machine by # creating an object and then fitting it. Since we are not passing an # Epsilon value, we pass data to find an optimal epsilon value while # instantiating our object. The optimal epsilon is found through the # scikit-learn ``CVGridSearch``. The ``grid_points`` parameter decides how # many possible epsilon values must be traversed. # cobra = Cobra() cobra.set_epsilon(X_epsilon=X_eps, y_epsilon=Y_eps, grid_points=5) cobra.epsilon cobra.fit(X_train, Y_train) ###################################################################### # We now see if our object can fit into the scikit-learn pipeline and # GridSearch - and it can! # from sklearn.utils.estimator_checks import check_estimator # check_estimator(Cobra) #passes
def optimal_split(self, X, y, split=None, epsilon=None, info=False, graph=False): """ Find the optimal combination split (D_k, D_l) for fixed epsilon value for the COBRA predictor. Parameteres ----------- X: array-like, [n_features] Vector for which we want for optimal split. y: float Target value for query to compare. epsilon: float, optional. fixed epsilon value to help determine optimal machines. split: list, optional. D_k, D_l break-up to calculate MSE info: bool, optional. Returns MSE dictionary for each split. graph: bool, optional. Plots graph of MSE vs split Returns ------- MSE: dictionary mapping split with mean squared errors opt: optimal epsilon value """ if epsilon is None: epsilon = self.aggregate.epsilon if split is None: split = [(0.20, 0.80), (0.40, 0.60), (0.50, 0.50), (0.60, 0.40), (0.80, 0.20)] MSE = {} for k, l in split: machine = Cobra(random_state=self.random_state, epsilon=epsilon) machine.fit(self.aggregate.X_, self.aggregate.y_, default=False) machine.split_data(int(k * len(self.aggregate.X_)), int((k + l) * len(self.aggregate.X_))) machine.load_default() machine.load_machine_predictions() results = machine.predict(X) MSE[(k, l)] = (mean_squared_error(y, results)) if graph: import matplotlib.pyplot as plt ratio, mse = [], [] for value in split: ratio.append(value[0]) mse.append(MSE[value]) plt.plot(ratio, mse) if info: return MSE opt = min(MSE, key=MSE.get) return opt, MSE[opt]
def optimal_machines(self, X, y, single=False, epsilon=None, info=False): """ Find the optimal combination of machines for testing data for the COBRA predictor. Parameteres ----------- X: array-like, [n_features] Vector for which we want optimal machine combinations. y: float Target value for query to compare. single: boolean, optional Option to calculate optimal machine combinations for a single query point instead. info: bool, optional Returns MSE dictionary for each machine combination value epsilon: float, optional fixed epsilon value to help determine optimal machines. Returns ------- MSE: dictionary mapping machines with mean squared errors opt: optimal machines combination """ if epsilon is None: epsilon = self.aggregate.epsilon n_machines = np.arange(1, len(self.aggregate.estimators_) + 1) MSE = {} for num in n_machines: machine_names = self.aggregate.estimators_.keys() use = list(itertools.combinations(machine_names, num)) for combination in use: machine = Cobra(random_state=self.random_state, epsilon=epsilon) machine.fit(self.aggregate.X_, self.aggregate.y_, default=False) machine.split_data() machine.load_default(machine_list=combination) machine.load_machine_predictions() if single: result = machine.predict(X.reshape(1, -1)) MSE[combination] = np.square(y - result) else: results = machine.predict(X) MSE[combination] = (mean_squared_error(y, results)) if info: return MSE opt = min(MSE, key=MSE.get) return opt, MSE[opt]
def cobraModelInit(trainNames, noiseType, imShape, patchSize=1, best=True): """ Initalise and train cobra mode """ print("Making training data ready") trainingData, trainingData1, trainingData2, testingData = loadTrainingData( trainNames, noiseType, patchSize) denoisemethods = denoiseMethods() epsilon = 0.2 machines = 3 cobra = Cobra(epsilon=epsilon, machines=machines) print("Training model") cobra.fit(trainingData, testingData) for i, denoise in enumerate(denoisemethods): cobra.load_machine(denoise, CobraMachine(denoise, patchSize)) cobra.load_machine_predictions() # print("Predictions:", cobra.machine_predictions_) if best: print("Running Diagnostics") cobra_diagnostics = Diagnostics(cobra, trainingData, testingData, load_MSE=False) print("epsilon") epsilon, _ = cobra_diagnostics.optimal_epsilon(trainingData, testingData, line_points=100, info=False) print("machines") machines, _ = cobra_diagnostics.optimal_alpha(trainingData, testingData, epsilon=epsilon, info=False) cobra = Cobra(epsilon=epsilon, machines=machines) print("fit") cobra.fit(trainingData, testingData, default=False, X_k=trainingData1, X_l=trainingData2, y_k=testingData, y_l=testingData) for i, denoise in enumerate(denoisemethods): cobra.load_machine(denoise, CobraMachine(denoise, patchSize)) cobra.load_machine_predictions() # print("Predictions:", cobra.machine_predictions_) return cobra, machines, epsilon
def optimal_machines_grid(self, X, y, line_points=200, info=False): """ Find the optimal epsilon and machine-combination for a single query point for the COBRA predictor. Parameteres ----------- X: array-like, [n_features] Vector for which we want optimal machines and epsilon values y: float Target value for query to compare. line_points: integer, optional Number of epsilon values to traverse the grid. info: bool, optional Returns MSE dictionary for each epsilon/machine value. Returns ------- MSE: dictionary mapping (machine combination, epsilon) with mean squared errors opt: optimal epislon/machine combination """ # code to find maximum and minimum distance between predictions to create grid a, size = sorted(self.aggregate.all_predictions_), len( self.aggregate.all_predictions_) res = [a[i + 1] - a[i] for i in range(size) if i + 1 < size] emin = min(res) emax = max(a) - min(a) erange = np.linspace(emin, emax, line_points) n_machines = np.arange(1, len(self.aggregate.machines_) + 1) MSE = {} for epsilon in erange: for num in n_machines: machine_names = self.aggregate.machines_.keys() use = list(itertools.combinations(machine_names, num)) for combination in use: machine = Cobra(random_state=self.random_state, epsilon=epsilon) machine.fit(self.aggregate.X_, self.aggregate.y_, default=False) machine.split_data() machine.load_default(machine_list=combination) machine.load_machine_predictions() result = machine.predict(X.reshape(1, -1)) MSE[(combination, epsilon)] = np.square(y - result) if info: return MSE opt = min(MSE, key=MSE.get) return opt, MSE[opt]
def boxplot(self, reps=100, info=False, dataframe=None, kind="normal"): """ Plots boxplots of machines. Parameters ---------- reps: int, optional Number of times to repeat experiments for boxplot. info: boolean, optional Returns data """ kwargs = self.kwargs if dataframe is None: if type(self.aggregate) is Cobra: MSE = {k: [] for k, v in self.estimators.items()} MSE["Cobra"] = [] for i in range(0, reps): cobra = Cobra(epsilon=self.aggregate.epsilon) X, y = shuffle(self.aggregate.X_, self.aggregate.y_) cobra.fit(X, y, default=False) cobra.split_data(shuffle_data=True) for machine in self.aggregate.estimators_: self.aggregate.estimators_[machine].fit(cobra.X_k_, cobra.y_k_) cobra.load_machine(machine, self.aggregate.estimators_[machine]) cobra.load_machine_predictions() for machine in self.estimators: if "Cobra" in machine: self.estimators[machine].fit(X, y) else: self.estimators[machine].fit(cobra.X_k_, cobra.y_k_) try: if type(self.estimators[machine]) == KernelCobra: preds = self.estimators[machine].predict(self.X_test, bandwidth=kwargs["bandwidth_kernel"]) else: preds = self.estimators[machine].predict(self.X_test) except KeyError: preds = self.estimators[machine].predict(self.X_test) MSE[machine].append(mean_squared_error(self.y_test, preds)) MSE["Cobra"].append(mean_squared_error(self.y_test, cobra.predict(self.X_test))) try: dataframe = pd.DataFrame(data=MSE) except ValueError: return MSE if type(self.aggregate) is KernelCobra: MSE = {k: [] for k, v in self.estimators.items()} MSE["KernalCobra"] = [] for i in range(0, reps): kernel = KernelCobra() X, y = shuffle(self.aggregate.X_, self.aggregate.y_) kernel.fit(X, y, default=False) kernel.split_data(shuffle_data=True) for machine in self.aggregate.estimators_: self.aggregate.estimators_[machine].fit(kernel.X_k_, kernel.y_k_) kernel.load_machine(machine, self.aggregate.estimators_[machine]) kernel.load_machine_predictions() for machine in self.estimators: if "Cobra" in machine: self.estimators[machine].fit(X, y) else: self.estimators[machine].fit(cobra.X_k_, cobra.y_k_) try: if type(self.estimators[machine]) == KernelCobra: preds = self.estimators[machine].predict(self.X_test, bandwidth=kwargs["bandwidth_kernel"]) else: preds = self.estimators[machine].predict(self.X_test) except KeyError: preds = self.estimators[machine].predict(self.X_test) MSE[machine].append(mean_squared_error(self.y_test, preds)) MSE["KernelCobra"].append(mean_squared_error(self.y_test, kernel.predict(self.X_test, bandwidth=kwargs[bandwidth_kernel]))) try: dataframe = pd.DataFrame(data=MSE) except ValueError: return MSE if type(self.aggregate) is Ewa: MSE = {k: [] for k, v in self.aggregate.estimators_.items()} MSE["EWA"] = [] for i in range(0, reps): ewa = Ewa(random_state=self.random_state, beta=self.aggregate.beta) X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state) ewa.fit(X, y, default=False) ewa.split_data(shuffle_data=True) for machine in self.estimators: self.aggregate.estimators_[machine].fit(ewa.X_k_, ewa.y_k_) ewa.load_machine(machine, self.aggregate.estimators_[machine]) ewa.load_machine_weights(self.aggregate.beta) X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state) for machine in self.estimators: if "EWA" in machine: self.estimators[machine].fit(X, y) else: self.estimators[machine].fit(ewa.X_k_, ewa.y_k_) try: if type(self.estimators[machine]) == KernelCobra: preds = self.estimators[machine].predict(self.X_test, bandwidth=kwargs["bandwidth_kernel"]) else: preds = self.estimators[machine].predict(self.X_test) except KeyError: preds = self.estimators[machine].predict(self.X_test) MSE[machine].append(mean_squared_error(y_test, preds)) MSE["EWA"].append(mean_squared_error(y_test, ewa.predict(X_test))) try: dataframe = pd.DataFrame(data=MSE) except ValueError: return MSE if type(self.aggregate) is ClassifierCobra: errors = {k: [] for k, v in self.aggregate.estimators_.items()} errors["ClassifierCobra"] = [] for i in range(0, reps): cc = ClassifierCobra(random_state=self.random_state) X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state) cc.fit(X, y, default=False) cc.split_data(shuffle_data=True) for machine in self.aggregate.estimators_: self.aggregate.estimators_[machine].fit(cc.X_k_, cc.y_k_) cc.load_machine(machine, self.aggregate.estimators_[machine]) cc.load_machine_predictions() X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state) for machine in self.estimators: errors[machine].append(1 - accuracy_score(y_test, self.estimators[machine].predict(X_test))) errors["ClassifierCobra"].append(1 - accuracy_score(y_test, cc.predict(X_test))) try: dataframe = pd.DataFrame(data=errors) except ValueError: return errors # code for different boxplot styles using the python graph gallery tutorial: # https://python-graph-gallery.com/39-hidden-data-under-boxplot/ sns.set(style="whitegrid") if kind == "normal": sns.boxplot(data=dataframe) plt.title("Boxplot") if kind == "violin": sns.violinplot(data=dataframe) plt.title("Violin Plot") if kind == "jitterplot": ax = sns.boxplot(data=dataframe) ax = sns.stripplot(data=dataframe, color="orange", jitter=0.2, size=2.5) plt.title("Boxplot with jitter", loc="left") plt.ylabel("Mean Squared Errors") plt.xlabel("Estimators") plt.figure(figsize=(self.plot_size, self.plot_size)) plt.show() if info: return dataframe
X_train = X[:D1 + D2] X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4] X_eps = X[D1 + D2:D1 + D2 + D3] # for testing Y_train = Y[:D1 + D2] Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4] Y_eps = Y[D1 + D2:D1 + D2 + D3] ###################################################################### # Setting up COBRA # ~~~~~~~~~~~~~~~~ # # Let's up our COBRA machine with the data. # cobra = Cobra(random_state=0, epsilon=0.5) cobra.fit(X_train, Y_train, default=False) ###################################################################### # When we are fitting, we initialise COBRA with an epsilon value of # :math:`0.5` - this is because we are aware of the distribution and 0.5 # is a fair guess of what would be a "good" epsilon value, because the # data varies from :math:`-1` to :math:`1`. # # If we do not pass the :math:`\epsilon` parameter, we perform a CV on the # training data for an optimised epsilon. # # It can be noticed that the ``default`` parameter is set as false: this # is so we can walk you through what happens when COBRA is set-up, instead # of the deafult settings being used. #