Exemplo n.º 1
0
    def setUp(self):
        # setting up our random data-set
        rng = np.random.RandomState(42)

        # D1 = train machines; D2 = create COBRA; D3 = calibrate epsilon, alpha; D4 = testing
        n_features = 20
        D1, D2, D3, D4 = 200, 200, 200, 200
        D = D1 + D2 + D3 + D4
        X = rng.uniform(-1, 1, D * n_features).reshape(D, n_features)
        Y = np.power(X[:, 1], 2) + np.power(X[:, 3], 3) + np.exp(X[:, 10])

        # training data-set
        X_train = X[:D1 + D2]
        X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4]
        # for testing
        Y_train = Y[:D1 + D2]
        Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4]

        cobra = Cobra(random_state=0, epsilon=0.5)
        cobra.fit(X_train, Y_train)

        ewa = Ewa(random_state=0)
        ewa.fit(X_train, Y_train)

        kernel = KernelCobra(random_state=0)
        kernel.fit(X_train, Y_train)

        self.test_data = X_test
        self.cobra = cobra
        self.ewa = ewa
        self.kernelcobra = kernel
Exemplo n.º 2
0
    def setUp(self):
        # setting up our random data-set
        rng = np.random.RandomState(42)

        # D1 = train machines; D2 = create COBRA; D3 = calibrate epsilon, alpha; D4 = testing
        n_features = 20
        D1, D2, D3, D4 = 200, 200, 200, 200
        D = D1 + D2 + D3 + D4
        X = rng.uniform(-1, 1, D * n_features).reshape(D, n_features)
        Y = np.power(X[:,1], 2) + np.power(X[:,3], 3) + np.exp(X[:,10]) 

        # training data-set
        X_train = X[:D1 + D2]
        X_test = X[D1 + D2 + D3:D1 + D2 + D3 + D4]
        # for testing
        Y_train = Y[:D1 + D2]
        Y_test = Y[D1 + D2 + D3:D1 + D2 + D3 + D4]

        cobra = Cobra(random_state=0, epsilon=0.5)
        cobra.fit(X_train, Y_train)
        self.test_data = X_test
        self.test_response = Y_test
        self.cobra = cobra
        self.cobra_vis = Visualisation(self.cobra, self.test_data[0:4], self.test_response[0:4])
        self.indices, self.mse = self.cobra_vis.indice_info(self.test_data[0:4], self.test_response[0:4], epsilon=self.cobra.epsilon)

        ewa = Ewa(random_state=0)
        ewa.fit(X_train, Y_train)
        self.ewa = ewa
        self.ewa_vis = Visualisation(self.ewa, self.test_data[0:4], self.test_response[0:4])
Exemplo n.º 3
0
    def optimal_beta(self, X, y, betas=None, info=False):
        """
        Find the optimal beta value for the Ewa predictor.

        Parameteres
        -----------

        X: array-like, [n_features]
            Vector for which we want for optimal beta.

        y: float
            Target value for query to compare.

        betas: list, optional
            List of beta values to iterate over for optimal beta.

        info: bool, optional
            Returns MSE dictionary for each beta value.

        Returns
        -------

        MSE: dictionary mapping epsilon with mean squared errors
        opt: optimal beta value

        """

        if betas is None:
            betas = np.arange(0.1, 10)

        MSE = {}
        for beta in betas:
            machine = Ewa(random_state=self.random_state, beta=beta)
            machine.fit(self.aggregate.X_, self.aggregate.y_)
            results = machine.predict(X)
            MSE[beta] = (mean_squared_error(y, results))

        if info:
            return MSE
        opt = min(MSE, key=MSE.get)
        return opt, MSE[opt]
Exemplo n.º 4
0
    def boxplot(self, reps=100, info=False):
        """
        Plots boxplots of machines.

        Parameters
        ----------
        reps: int, optional
            Number of times to repeat experiments for boxplot.

        info: boolean, optional
            Returns data 

        """
        if type(self.aggregate) is Cobra:

            MSE = {k: [] for k, v in self.aggregate.machines_.items()}
            MSE["COBRA"] = []
            for i in range(0, reps):
                cobra = Cobra(random_state=self.random_state, epsilon=self.aggregate.epsilon)
                X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state)
                cobra.fit(X, y, default=False)
                cobra.split_data(shuffle_data=True)

                for machine in self.aggregate.machines_:
                    self.aggregate.machines_[machine].fit(cobra.X_k_, cobra.y_k_)
                    cobra.load_machine(machine, self.aggregate.machines_[machine])

                cobra.load_machine_predictions()
                X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state)

                for machine in cobra.machines_:
                    MSE[machine].append(mean_squared_error(y_test, cobra.machines_[machine].predict(X_test)))
                MSE["COBRA"].append(mean_squared_error(y_test, cobra.predict(X_test)))

            data, labels = [], []
            for machine in MSE:
                data.append(MSE[machine])
                labels.append(machine)

        if type(self.aggregate) is Ewa:

            MSE = {k: [] for k, v in self.aggregate.machines_.items()}
            MSE["EWA"] = []
            for i in range(0, reps):
                ewa = Ewa(random_state=self.random_state, beta=self.aggregate.beta)
                X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state)
                ewa.fit(X, y, default=False)
                ewa.split_data(shuffle_data=True)

                for machine in self.aggregate.machines_:
                    self.aggregate.machines_[machine].fit(ewa.X_k_, ewa.y_k_)
                    ewa.load_machine(machine, self.aggregate.machines_[machine])

                ewa.load_machine_weights(self.aggregate.beta)
                X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state)
                for machine in ewa.machines_:
                    MSE[machine].append(mean_squared_error(y_test, ewa.machines_[machine].predict(X_test)))
                MSE["EWA"].append(mean_squared_error(y_test, ewa.predict(X_test)))

            data, labels = [], []
            for machine in MSE:
                data.append(MSE[machine])
                labels.append(machine)

        if type(self.aggregate) is ClassifierCobra:

            errors = {k: [] for k, v in self.aggregate.machines_.items()}
            errors["ClassifierCobra"] = []
            for i in range(0, reps):
                cc = ClassifierCobra(random_state=self.random_state)
                X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state)
                cc.fit(X, y, default=False)
                cc.split_data(shuffle_data=True)

                for machine in self.aggregate.machines_:
                    self.aggregate.machines_[machine].fit(cc.X_k_, cc.y_k_)
                    cc.load_machine(machine, self.aggregate.machines_[machine])

                cc.load_machine_predictions()
                X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state)
                for machine in cc.machines_: 
                    errors[machine].append(1 - accuracy_score(y_test, cc.machines_[machine].predict(X_test)))
                errors["ClassifierCobra"].append(1 - accuracy_score(y_test, cc.predict(X_test)))

            data, labels = [], []
            for machine in errors:
                data.append(errors[machine])
                labels.append(machine)


        plt.figure(figsize=(self.plot_size, self.plot_size))
        plt.boxplot(data, labels=labels)
        plt.show()
        
        if info:
            return data        
Exemplo n.º 5
0
# 

cobra_vis.QQ()

cobra_vis.boxplot()


######################################################################
# Plotting EWA!
# ~~~~~~~~~~~~~
# 
# We can use the same visualisation class for seeing how EWA works. Let's
# demonstrate this!
# 

ewa = Ewa()
ewa.set_beta(X_beta=X_eps, y_beta=Y_eps)
ewa.fit(X_train, Y_train)

ewa_vis = Visualisation(ewa, X_test, Y_test)

ewa_vis.QQ("EWA")

ewa_vis.boxplot()


######################################################################
# Plotting ClassifierCobra
# ~~~~~~~~~~~~~~~~~~~~~~~~
# 
Exemplo n.º 6
0
#

from sklearn.utils.estimator_checks import check_estimator
# check_estimator(Cobra) #passes

######################################################################
# Exponentially Weighted Average Aggregate
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# Let us also demonstrate the EWA predictor. You can read more about it
# over here in the
# `paper <http://www.crest.fr/ckfinder/userfiles/files/pageperso/tsybakov/DTcolt2007.pdf>`__
# by A. Dalalyan and A. B. Tsybakov.
#

ewa = Ewa()

ewa.set_beta(X_beta=X_eps, y_beta=Y_eps)

######################################################################
# If we fit EWA without passing beta, we perform a CV to find the optimal
# beta.
#

ewa.fit(X_train, Y_train)

# check_estimator(Ewa) #passes

######################################################################
# EWA assigns weights to each machine based on it's MSE. We can check the
# weights of each machine with the ``plot_machine_weights`` method.
Exemplo n.º 7
0
    def boxplot(self, reps=100, info=False, dataframe=None, kind="normal"):
        """
        Plots boxplots of machines.

        Parameters
        ----------
        reps: int, optional
            Number of times to repeat experiments for boxplot.

        info: boolean, optional
            Returns data 

        """

        kwargs = self.kwargs
        if dataframe is None:
            if type(self.aggregate) is Cobra:

                MSE = {k: [] for k, v in self.estimators.items()}
                MSE["Cobra"] = []
                for i in range(0, reps):
                    cobra = Cobra(epsilon=self.aggregate.epsilon)
                    X, y = shuffle(self.aggregate.X_, self.aggregate.y_)
                    cobra.fit(X, y, default=False)
                    cobra.split_data(shuffle_data=True)

                    for machine in self.aggregate.estimators_:
                        self.aggregate.estimators_[machine].fit(cobra.X_k_, cobra.y_k_)
                        cobra.load_machine(machine, self.aggregate.estimators_[machine])

                    cobra.load_machine_predictions()

                    for machine in self.estimators:
                        if "Cobra" in machine:
                            self.estimators[machine].fit(X, y)
                        else:
                            self.estimators[machine].fit(cobra.X_k_, cobra.y_k_)
                        try:
                            if type(self.estimators[machine]) == KernelCobra:
                                preds = self.estimators[machine].predict(self.X_test, bandwidth=kwargs["bandwidth_kernel"])
                            else:
                                preds = self.estimators[machine].predict(self.X_test)
                        except KeyError:
                            preds = self.estimators[machine].predict(self.X_test)                      
                        
                        MSE[machine].append(mean_squared_error(self.y_test, preds))

                    MSE["Cobra"].append(mean_squared_error(self.y_test, cobra.predict(self.X_test)))

                try:
                    dataframe = pd.DataFrame(data=MSE)
                except ValueError:
                    return MSE

            if type(self.aggregate) is KernelCobra:

                MSE = {k: [] for k, v in self.estimators.items()}
                MSE["KernalCobra"] = []
                for i in range(0, reps):
                    kernel = KernelCobra()
                    X, y = shuffle(self.aggregate.X_, self.aggregate.y_)
                    kernel.fit(X, y, default=False)
                    kernel.split_data(shuffle_data=True)

                    for machine in self.aggregate.estimators_:
                        self.aggregate.estimators_[machine].fit(kernel.X_k_, kernel.y_k_)
                        kernel.load_machine(machine, self.aggregate.estimators_[machine])

                    kernel.load_machine_predictions()

                    for machine in self.estimators:
                        if "Cobra" in machine:
                            self.estimators[machine].fit(X, y)
                        else:
                            self.estimators[machine].fit(cobra.X_k_, cobra.y_k_)
                        
                        try:
                            if type(self.estimators[machine]) == KernelCobra:
                                preds = self.estimators[machine].predict(self.X_test, bandwidth=kwargs["bandwidth_kernel"])
                            else:
                                preds = self.estimators[machine].predict(self.X_test)
                        except KeyError:
                            preds = self.estimators[machine].predict(self.X_test)

                        MSE[machine].append(mean_squared_error(self.y_test, preds))

                    MSE["KernelCobra"].append(mean_squared_error(self.y_test, kernel.predict(self.X_test, bandwidth=kwargs[bandwidth_kernel])))

                try:
                    dataframe = pd.DataFrame(data=MSE)
                except ValueError:
                    return MSE


            if type(self.aggregate) is Ewa:

                MSE = {k: [] for k, v in self.aggregate.estimators_.items()}
                MSE["EWA"] = []
                for i in range(0, reps):
                    ewa = Ewa(random_state=self.random_state, beta=self.aggregate.beta)
                    X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state)
                    ewa.fit(X, y, default=False)
                    ewa.split_data(shuffle_data=True)

                    for machine in self.estimators:
                        self.aggregate.estimators_[machine].fit(ewa.X_k_, ewa.y_k_)
                        ewa.load_machine(machine, self.aggregate.estimators_[machine])

                    ewa.load_machine_weights(self.aggregate.beta)
                    X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state)
                    for machine in self.estimators:
                        if "EWA" in machine:
                            self.estimators[machine].fit(X, y)
                        else:
                            self.estimators[machine].fit(ewa.X_k_, ewa.y_k_)
                        try:
                            if type(self.estimators[machine]) == KernelCobra:
                                preds = self.estimators[machine].predict(self.X_test, bandwidth=kwargs["bandwidth_kernel"])
                            else:
                                preds = self.estimators[machine].predict(self.X_test)
                        except KeyError:
                            preds = self.estimators[machine].predict(self.X_test)                      
                        MSE[machine].append(mean_squared_error(y_test, preds))
                    
                    MSE["EWA"].append(mean_squared_error(y_test, ewa.predict(X_test)))

                try:
                    dataframe = pd.DataFrame(data=MSE)
                except ValueError:
                    return MSE

            if type(self.aggregate) is ClassifierCobra:

                errors = {k: [] for k, v in self.aggregate.estimators_.items()}
                errors["ClassifierCobra"] = []
                for i in range(0, reps):
                    cc = ClassifierCobra(random_state=self.random_state)
                    X, y = shuffle(self.aggregate.X_, self.aggregate.y_, random_state=self.aggregate.random_state)
                    cc.fit(X, y, default=False)
                    cc.split_data(shuffle_data=True)

                    for machine in self.aggregate.estimators_:
                        self.aggregate.estimators_[machine].fit(cc.X_k_, cc.y_k_)
                        cc.load_machine(machine, self.aggregate.estimators_[machine])

                    cc.load_machine_predictions()
                    X_test, y_test = shuffle(self.X_test, self.y_test, random_state=self.aggregate.random_state)
                    for machine in self.estimators: 
                        errors[machine].append(1 - accuracy_score(y_test, self.estimators[machine].predict(X_test)))
                    errors["ClassifierCobra"].append(1 - accuracy_score(y_test, cc.predict(X_test)))

                try:
                    dataframe = pd.DataFrame(data=errors)
                except ValueError:
                    return errors
        


        # code for different boxplot styles using the python graph gallery tutorial:
        # https://python-graph-gallery.com/39-hidden-data-under-boxplot/

        sns.set(style="whitegrid")

        if kind == "normal":
            sns.boxplot(data=dataframe)
            plt.title("Boxplot")

        if kind == "violin":
            sns.violinplot(data=dataframe)
            plt.title("Violin Plot")

        if kind == "jitterplot":
            ax = sns.boxplot(data=dataframe)
            ax = sns.stripplot(data=dataframe, color="orange", jitter=0.2, size=2.5)
            plt.title("Boxplot with jitter", loc="left")

        plt.ylabel("Mean Squared Errors")
        plt.xlabel("Estimators")
        plt.figure(figsize=(self.plot_size, self.plot_size))
        plt.show()

        
        if info:
            return dataframe