def setUp(self): # setting up our random data-set rng = np.random.RandomState(42) # D1 = train machines; D2 = create COBRA; D3 = calibrate epsilon, alpha; D4 = testing n_features = 20 D1, D2, D3, D4 = 200, 200, 200, 200 D = D1 + D2 + D3 + D4 X = rng.uniform(-1, 1, D * n_features).reshape(D, n_features) Y = np.power(X[:, 1], 2) + np.power(X[:, 3], 3) + np.exp(X[:, 10]) # training data-set X_train = X[:D1 + D2] X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4] # for testing Y_train = Y[:D1 + D2] Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4] cobra = Cobra(random_state=0, epsilon=0.5) cobra.fit(X_train, Y_train) ewa = Ewa(random_state=0) ewa.fit(X_train, Y_train) kernel = KernelCobra(random_state=0) kernel.fit(X_train, Y_train) self.test_data = X_test self.cobra = cobra self.ewa = ewa self.kernelcobra = kernel
def setUp(self): # setting up our random data-set rng = np.random.RandomState(42) # D1 = train machines; D2 = create COBRA; D3 = calibrate epsilon, alpha; D4 = testing n_features = 20 D1, D2, D3, D4 = 200, 200, 200, 200 D = D1 + D2 + D3 + D4 X = rng.uniform(-1, 1, D * n_features).reshape(D, n_features) Y = np.power(X[:,1], 2) + np.power(X[:,3], 3) + np.exp(X[:,10]) # training data-set X_train = X[:D1 + D2] X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4] # for testing Y_train = Y[:D1 + D2] Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4] cobra = Cobra(random_state=0, epsilon=0.5) cobra.fit(X_train, Y_train) self.test_data = X_test self.test_response = Y_test self.cobra = cobra self.cobra_vis = Visualisation(self.cobra, self.test_data[0:4], self.test_response[0:4]) self.indices, self.mse = self.cobra_vis.indice_info(self.test_data[0:4], self.test_response[0:4], epsilon=self.cobra.epsilon) ewa = Ewa(random_state=0) ewa.fit(X_train, Y_train) self.ewa = ewa self.ewa_vis = Visualisation(self.ewa, self.test_data[0:4], self.test_response[0:4])
def optimal_beta(self, X, y, betas=None, info=False): """ Find the optimal beta value for the Ewa predictor. Parameteres ----------- X: array-like, [n_features] Vector for which we want for optimal beta. y: float Target value for query to compare. betas: list, optional List of beta values to iterate over for optimal beta. info: bool, optional Returns MSE dictionary for each beta value. Returns ------- MSE: dictionary mapping epsilon with mean squared errors opt: optimal beta value """ if betas is None: betas = np.arange(0.1, 10) MSE = {} for beta in betas: machine = Ewa(random_state=self.random_state, beta=beta) machine.fit(self.aggregate.X_, self.aggregate.y_) results = machine.predict(X) MSE[beta] = (mean_squared_error(y, results)) if info: return MSE opt = min(MSE, key=MSE.get) return opt, MSE[opt]
def boxplot(self, reps=100, info=False): """ Plots boxplots of machines. Parameters ---------- reps: int, optional Number of times to repeat experiments for boxplot. info: boolean, optional Returns data """ if type(self.aggregate) is Cobra: MSE = {k: [] for k, v in self.aggregate.machines_.items()} MSE["COBRA"] = [] for i in range(0, reps): cobra = Cobra(random_state=self.random_state, epsilon=self.aggregate.epsilon) X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state) cobra.fit(X, y, default=False) cobra.split_data(shuffle_data=True) for machine in self.aggregate.machines_: self.aggregate.machines_[machine].fit(cobra.X_k_, cobra.y_k_) cobra.load_machine(machine, self.aggregate.machines_[machine]) cobra.load_machine_predictions() X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state) for machine in cobra.machines_: MSE[machine].append(mean_squared_error(y_test, cobra.machines_[machine].predict(X_test))) MSE["COBRA"].append(mean_squared_error(y_test, cobra.predict(X_test))) data, labels = [], [] for machine in MSE: data.append(MSE[machine]) labels.append(machine) if type(self.aggregate) is Ewa: MSE = {k: [] for k, v in self.aggregate.machines_.items()} MSE["EWA"] = [] for i in range(0, reps): ewa = Ewa(random_state=self.random_state, beta=self.aggregate.beta) X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state) ewa.fit(X, y, default=False) ewa.split_data(shuffle_data=True) for machine in self.aggregate.machines_: self.aggregate.machines_[machine].fit(ewa.X_k_, ewa.y_k_) ewa.load_machine(machine, self.aggregate.machines_[machine]) ewa.load_machine_weights(self.aggregate.beta) X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state) for machine in ewa.machines_: MSE[machine].append(mean_squared_error(y_test, ewa.machines_[machine].predict(X_test))) MSE["EWA"].append(mean_squared_error(y_test, ewa.predict(X_test))) data, labels = [], [] for machine in MSE: data.append(MSE[machine]) labels.append(machine) if type(self.aggregate) is ClassifierCobra: errors = {k: [] for k, v in self.aggregate.machines_.items()} errors["ClassifierCobra"] = [] for i in range(0, reps): cc = ClassifierCobra(random_state=self.random_state) X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state) cc.fit(X, y, default=False) cc.split_data(shuffle_data=True) for machine in self.aggregate.machines_: self.aggregate.machines_[machine].fit(cc.X_k_, cc.y_k_) cc.load_machine(machine, self.aggregate.machines_[machine]) cc.load_machine_predictions() X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state) for machine in cc.machines_: errors[machine].append(1 - accuracy_score(y_test, cc.machines_[machine].predict(X_test))) errors["ClassifierCobra"].append(1 - accuracy_score(y_test, cc.predict(X_test))) data, labels = [], [] for machine in errors: data.append(errors[machine]) labels.append(machine) plt.figure(figsize=(self.plot_size, self.plot_size)) plt.boxplot(data, labels=labels) plt.show() if info: return data
# cobra_vis.QQ() cobra_vis.boxplot() ###################################################################### # Plotting EWA! # ~~~~~~~~~~~~~ # # We can use the same visualisation class for seeing how EWA works. Let's # demonstrate this! # ewa = Ewa() ewa.set_beta(X_beta=X_eps, y_beta=Y_eps) ewa.fit(X_train, Y_train) ewa_vis = Visualisation(ewa, X_test, Y_test) ewa_vis.QQ("EWA") ewa_vis.boxplot() ###################################################################### # Plotting ClassifierCobra # ~~~~~~~~~~~~~~~~~~~~~~~~ #
# from sklearn.utils.estimator_checks import check_estimator # check_estimator(Cobra) #passes ###################################################################### # Exponentially Weighted Average Aggregate # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # Let us also demonstrate the EWA predictor. You can read more about it # over here in the # `paper <http://www.crest.fr/ckfinder/userfiles/files/pageperso/tsybakov/DTcolt2007.pdf>`__ # by A. Dalalyan and A. B. Tsybakov. # ewa = Ewa() ewa.set_beta(X_beta=X_eps, y_beta=Y_eps) ###################################################################### # If we fit EWA without passing beta, we perform a CV to find the optimal # beta. # ewa.fit(X_train, Y_train) # check_estimator(Ewa) #passes ###################################################################### # EWA assigns weights to each machine based on it's MSE. We can check the # weights of each machine with the ``plot_machine_weights`` method.
def boxplot(self, reps=100, info=False, dataframe=None, kind="normal"): """ Plots boxplots of machines. Parameters ---------- reps: int, optional Number of times to repeat experiments for boxplot. info: boolean, optional Returns data """ kwargs = self.kwargs if dataframe is None: if type(self.aggregate) is Cobra: MSE = {k: [] for k, v in self.estimators.items()} MSE["Cobra"] = [] for i in range(0, reps): cobra = Cobra(epsilon=self.aggregate.epsilon) X, y = shuffle(self.aggregate.X_, self.aggregate.y_) cobra.fit(X, y, default=False) cobra.split_data(shuffle_data=True) for machine in self.aggregate.estimators_: self.aggregate.estimators_[machine].fit(cobra.X_k_, cobra.y_k_) cobra.load_machine(machine, self.aggregate.estimators_[machine]) cobra.load_machine_predictions() for machine in self.estimators: if "Cobra" in machine: self.estimators[machine].fit(X, y) else: self.estimators[machine].fit(cobra.X_k_, cobra.y_k_) try: if type(self.estimators[machine]) == KernelCobra: preds = self.estimators[machine].predict(self.X_test, bandwidth=kwargs["bandwidth_kernel"]) else: preds = self.estimators[machine].predict(self.X_test) except KeyError: preds = self.estimators[machine].predict(self.X_test) MSE[machine].append(mean_squared_error(self.y_test, preds)) MSE["Cobra"].append(mean_squared_error(self.y_test, cobra.predict(self.X_test))) try: dataframe = pd.DataFrame(data=MSE) except ValueError: return MSE if type(self.aggregate) is KernelCobra: MSE = {k: [] for k, v in self.estimators.items()} MSE["KernalCobra"] = [] for i in range(0, reps): kernel = KernelCobra() X, y = shuffle(self.aggregate.X_, self.aggregate.y_) kernel.fit(X, y, default=False) kernel.split_data(shuffle_data=True) for machine in self.aggregate.estimators_: self.aggregate.estimators_[machine].fit(kernel.X_k_, kernel.y_k_) kernel.load_machine(machine, self.aggregate.estimators_[machine]) kernel.load_machine_predictions() for machine in self.estimators: if "Cobra" in machine: self.estimators[machine].fit(X, y) else: self.estimators[machine].fit(cobra.X_k_, cobra.y_k_) try: if type(self.estimators[machine]) == KernelCobra: preds = self.estimators[machine].predict(self.X_test, bandwidth=kwargs["bandwidth_kernel"]) else: preds = self.estimators[machine].predict(self.X_test) except KeyError: preds = self.estimators[machine].predict(self.X_test) MSE[machine].append(mean_squared_error(self.y_test, preds)) MSE["KernelCobra"].append(mean_squared_error(self.y_test, kernel.predict(self.X_test, bandwidth=kwargs[bandwidth_kernel]))) try: dataframe = pd.DataFrame(data=MSE) except ValueError: return MSE if type(self.aggregate) is Ewa: MSE = {k: [] for k, v in self.aggregate.estimators_.items()} MSE["EWA"] = [] for i in range(0, reps): ewa = Ewa(random_state=self.random_state, beta=self.aggregate.beta) X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state) ewa.fit(X, y, default=False) ewa.split_data(shuffle_data=True) for machine in self.estimators: self.aggregate.estimators_[machine].fit(ewa.X_k_, ewa.y_k_) ewa.load_machine(machine, self.aggregate.estimators_[machine]) ewa.load_machine_weights(self.aggregate.beta) X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state) for machine in self.estimators: if "EWA" in machine: self.estimators[machine].fit(X, y) else: self.estimators[machine].fit(ewa.X_k_, ewa.y_k_) try: if type(self.estimators[machine]) == KernelCobra: preds = self.estimators[machine].predict(self.X_test, bandwidth=kwargs["bandwidth_kernel"]) else: preds = self.estimators[machine].predict(self.X_test) except KeyError: preds = self.estimators[machine].predict(self.X_test) MSE[machine].append(mean_squared_error(y_test, preds)) MSE["EWA"].append(mean_squared_error(y_test, ewa.predict(X_test))) try: dataframe = pd.DataFrame(data=MSE) except ValueError: return MSE if type(self.aggregate) is ClassifierCobra: errors = {k: [] for k, v in self.aggregate.estimators_.items()} errors["ClassifierCobra"] = [] for i in range(0, reps): cc = ClassifierCobra(random_state=self.random_state) X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state) cc.fit(X, y, default=False) cc.split_data(shuffle_data=True) for machine in self.aggregate.estimators_: self.aggregate.estimators_[machine].fit(cc.X_k_, cc.y_k_) cc.load_machine(machine, self.aggregate.estimators_[machine]) cc.load_machine_predictions() X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state) for machine in self.estimators: errors[machine].append(1 - accuracy_score(y_test, self.estimators[machine].predict(X_test))) errors["ClassifierCobra"].append(1 - accuracy_score(y_test, cc.predict(X_test))) try: dataframe = pd.DataFrame(data=errors) except ValueError: return errors # code for different boxplot styles using the python graph gallery tutorial: # https://python-graph-gallery.com/39-hidden-data-under-boxplot/ sns.set(style="whitegrid") if kind == "normal": sns.boxplot(data=dataframe) plt.title("Boxplot") if kind == "violin": sns.violinplot(data=dataframe) plt.title("Violin Plot") if kind == "jitterplot": ax = sns.boxplot(data=dataframe) ax = sns.stripplot(data=dataframe, color="orange", jitter=0.2, size=2.5) plt.title("Boxplot with jitter", loc="left") plt.ylabel("Mean Squared Errors") plt.xlabel("Estimators") plt.figure(figsize=(self.plot_size, self.plot_size)) plt.show() if info: return dataframe