Exemplo n.º 1
0
    def pick(self):
        """
        Pick the next feature location for the next observation to be taken.

        This uses the recursive Delaunay subdivision algorithm.

        Returns
        -------
        numpy.ndarray
            Location in the parameter space for the next observation to be
            taken
        str
            A random hexadecimal ID to identify the corresponding job
        """
        n = len(self.X)

        # -- note that we are assuming the points in X are not reordered by
        # the scipy Delaunay implementation

        n_corners = 2 ** self.dims
        if n < n_corners + 1:

            # Bootstrap with a regular sampling strategy to get it started
            xq = grid_sample(self.lower, self.upper, n)
            yq_exp = [0.]
        else:

            X = self.X()  # calling returns the value as an array
            y = self.y()
            virtual = self.virtual_flag()

            # Otherwise, recursive subdivide the edges with the Delaunay model
            if not self.triangulation:
                self.triangulation = ScipyDelaunay(X, incremental=True)

            # Weight by hyper-volume
            simplices = [tuple(s) for s in self.triangulation.vertices]
            cache = self.simplex_cache

            def get_value(s):

                # Computes the sample value as:
                #   hyper-volume of simplex * variance of values in simplex
                ind = list(s)
                value = (np.var(y[ind]) + self.explore_priority) * \
                    np.linalg.det((X[ind] - X[ind[0]])[1:])
                if not np.max(virtual[ind]):
                    cache[s] = value
                return value

            # Mostly the simplices won't change from call to call - cache!
            sample_value = [cache[s] if s in cache else get_value(s)
                            for s in simplices]

            # alternatively, a nicely vectorised computation might work here
            # profile and check what the bottleneck is

            # Extract the points in the highest value simplex
            simplex_indices = list(simplices[np.argmax(sample_value)])
            simplex = X[simplex_indices]
            simplex_v = y[simplex_indices]

            # Weight the position in this simplex based on value deviation
            eps = 1e-3
            weight = eps + np.abs(simplex_v - np.mean(simplex_v))
            weight /= np.sum(weight)
            xq = np.sum(weight * simplex, axis=0)  # dot
            yq_exp = np.sum(weight * simplex_v, axis=0)

            self.triangulation.add_points(xq[np.newaxis, :])  # incremental

        uid = Sampler._assign(self, xq, yq_exp)
        return xq, uid
Exemplo n.º 2
0
    def pick(self):
        """
        Pick the next feature location for the next observation to be taken.

        This uses the recursive Delaunay subdivision algorithm.

        Returns
        -------
        numpy.ndarray
            Location in the parameter space for the next observation to be
            taken
        str
            A random hexadecimal ID to identify the corresponding job
        """
        n = len(self.X)

        # -- note that we are assuming the points in X are not reordered by
        # the scipy Delaunay implementation

        n_corners = 2**self.dims
        if n < n_corners + 1:

            # Bootstrap with a regular sampling strategy to get it started
            xq = grid_sample(self.lower, self.upper, n)
            yq_exp = [0.]
        else:

            X = self.X()  # calling returns the value as an array
            y = self.y()
            virtual = self.virtual_flag()

            # Otherwise, recursive subdivide the edges with the Delaunay model
            if not self.triangulation:
                self.triangulation = ScipyDelaunay(X, incremental=True)

            # Weight by hyper-volume
            simplices = [tuple(s) for s in self.triangulation.vertices]
            cache = self.simplex_cache

            def get_value(s):

                # Computes the sample value as:
                #   hyper-volume of simplex * variance of values in simplex
                ind = list(s)
                value = (np.var(y[ind]) + self.explore_priority) * \
                    np.linalg.det((X[ind] - X[ind[0]])[1:])
                if not np.max(virtual[ind]):
                    cache[s] = value
                return value

            # Mostly the simplices won't change from call to call - cache!
            sample_value = [
                cache[s] if s in cache else get_value(s) for s in simplices
            ]

            # alternatively, a nicely vectorised computation might work here
            # profile and check what the bottleneck is

            # Extract the points in the highest value simplex
            simplex_indices = list(simplices[np.argmax(sample_value)])
            simplex = X[simplex_indices]
            simplex_v = y[simplex_indices]

            # Weight the position in this simplex based on value deviation
            eps = 1e-3
            weight = eps + np.abs(simplex_v - np.mean(simplex_v))
            weight /= np.sum(weight)
            xq = np.sum(weight * simplex, axis=0)  # dot
            yq_exp = np.sum(weight * simplex_v, axis=0)

            self.triangulation.add_points(xq[np.newaxis, :])  # incremental

        uid = Sampler._assign(self, xq, yq_exp)
        return xq, uid
Exemplo n.º 3
0
    def pick(self, n_test=500):
        """
        Pick the next feature location for the next observation to be taken.

        .. note :: [Properties Modified]
                    X,
                    y,
                    virtual_flag,
                    pending_results,
                    y_mean,
                    hyperparameters,
                    regressors

        Parameters
        ----------
        n_test : int, optional
            The number of random query points across the search space to pick
            from

        Returns
        -------
        numpy.ndarray
            Location in the parameter space for the next observation to be
            taken
        str
            A random hexadecimal ID to identify the corresponding job
        """
        n = len(self.X)

        self.update_y_mean()

        # If we do not have enough samples yet, randomly sample for more!
        if n < self.n_min:

            xq = random_sample(self.lower, self.upper, 1)[0]
            yq_exp = self.y_mean  # Note: Can be 'None' initially

        else:

            if self.regressors is None:
                self.train()

            # Randomly sample the volume for test points
            Xq = random_sample(self.lower, self.upper, n_test)

            # Generate cached predictors for those test points
            predictors = [gp.query(r, Xq) for r in self.regressors]

            # Compute the posterior distributions at those points
            # Note: No covariance information implemented at this stage
            Yq_exp = np.asarray([gp.mean(p) for p in predictors]).T + \
                self.y_mean
            Yq_var = np.asarray([gp.variance(p) for p in predictors]).T

            # Aquisition Functions
            acq_defs_current = acq_defs(y_mean=self.y_mean,
                                        explore_priority=self.explore_priority)

            # Compute the acquisition levels at those test points
            yq_acq = acq_defs_current[self.acq_name](Yq_exp, Yq_var)

            # Find the test point with the highest acquisition level
            iq_acq = np.argmax(yq_acq)
            xq = Xq[iq_acq, :]
            yq_exp = Yq_exp[iq_acq, :]

        # Place a virtual observation...
        uid = Sampler._assign(self, xq, yq_exp)  # it can be None...

        return xq, uid