Exemplo n.º 1
0
    def optimize_log_lik(self):
        """
       Optimize the kernel hyperparameters before acuiring the next point
        """

        #inital hyperparams
        lengthscales = self.initial_hyperparams['length scales']
        noise_var = self.initial_hyperparams['noise variance']
        amp = self.initial_hyperparams['covar amplitude']

        # optimize hyperparams using SK learn
        sk_loklik0, sk_amp0, sk_noise0, sk_ls0 = self.sk_kernel(
            amp, noise_var, lengthscales)

        if self.X_obs.shape[0] > 1:  # don't run twice on the first step
            #last hyperparams seen so far
            current_covar = np.sqrt(
                1. / np.exp(self.model.covar_params[0])
            )  #this gives a full matrix and not just lengscale, the sk learn
            #can't deal with matrix.
            current_lengthscales = np.diag(
                current_covar)  #take only diagonal part - the lengthscales!
            current_covar_amp = np.exp(self.model.covar_params[1])
            current_noise_variance = self.model.noise_var

            #repeate SK learn
            print('optimize on last hyperparams seen so far')
            sk_loklik, sk_amp, sk_noise, sk_ls = self.sk_kernel(
                current_covar_amp, current_noise_variance,
                current_lengthscales)

            # compare lik and choose best hyperparams
            if sk_loklik > sk_loklik0:
                hyperparams_opt = (
                    (np.diag(1. / (sk_ls**2))), np.log(sk_amp),
                    np.log(sk_noise)
                )  #this is the required packing for the online gp code
            else:
                hyperparams_opt = ((np.diag(1. / (sk_ls0**2))),
                                   np.log(sk_amp0), np.log(sk_noise0))
            #print(hyperparams_opt)

        # create new OnlineGP model - overwrites the existing one
        print('sanity dim check: ', self.model.nin == self.X_obs.shape[1])
        self.model = OGP(
            self.model.nin,
            hyperparams=hyperparams_opt,
            maxBV=self.model.maxBV,
            covar=self.model.covar
        )  #, weighted=self.model.weighted,maxBV=self.model.maxBV, prmean=self.model.prmean, prmeanp=self.model.prmeanp, prvar=self.model.prvar, prvarp=self.model.prvarp,proj=self.model.proj,thresh=self.model.thresh, sparsityQ=self.model.sparsityQ)

        # initialize model on current data
        p_X = self.X_obs
        p_Y = self.ytrain
        num = p_X.shape[0]
        self.model.fit(p_X, p_Y, num)
    def optimize_kernel_hyperparameters(self, noiseQ=False):
        """
       Optimize the kernel hyperparameters before acuiring the next point.
       This method optimizes the kernel twice - starting from the initial or last hyperparamters.
       Then compares the log likelihood and re-build the GP model using the most likely hypers.
       Note:  sk learn can't deal with matrix. so we can only optimize on lengthscales.
        """
        self.noiseQ = noiseQ
        # optimize kernel using SK learn from initial hyperparams
        print('optimize on initial hyperparams')
        sk_loklik0, sk_hypers0 = self.sk_kernel(self.initial_hyperparams)

        # optimize kernel using SK learn from current hyperparams
        self.current_hyperparams = {}
        self.current_hyperparams['precisionMatrix'] = np.diag(
            1. / self.model.lengthscales**2)
        self.current_hyperparams['noise_variance'] = self.model.noise_var
        self.current_hyperparams[
            'amplitude_covar'] = self.model.amplitude_covar

        print('optimize on last hyperparams seen so far')
        sk_loklik, sk_hypers = self.sk_kernel(self.current_hyperparams)

        # compare likelihoods and choose best hyperparams
        if sk_loklik > sk_loklik0:
            hyperparams_opt = sk_hypers
        else:
            hyperparams_opt = sk_hypers0

        for key in hyperparams_opt:
            if key == 'precisionMatrix':
                self.hyperparams_opt_all[key] = np.array(
                    list(
                        chain(self.hyperparams_opt_all[key],
                              [hyperparams_opt[key].diagonal()])))
            else:
                self.hyperparams_opt_all[key] = list(
                    chain(self.hyperparams_opt_all[key],
                          [hyperparams_opt[key]]))

        if self.verboseQ: print('hyperparams_opt ', hyperparams_opt)

        # create new OnlineGP model - overwrites the existing one
        if self.verboseQ:
            print('sanity dim check: ', self.model.dim == self.X_obs.shape[1])

        self.model = OGP(self.model.dim,
                         hyperparams=hyperparams_opt,
                         maxBV=self.model.maxBV,
                         covar=self.model.covar)
        #         ,weighted=self.model.weighted, maxBV=self.model.maxBV) #, prmean=self.model.prmean, prmeanp=self.model.prmeanp, prvar=self.model.prvar, prvarp=self.model.prvarp , proj=self.model.proj,thresh=self.model.thresh, sparsityQ=self.model.sparsityQ, verboseQ=self.model.verboseQ)

        # initialize new model on current data
        self.model.fit(self.X_obs,
                       np.array(self.Y_obs).flatten(), self.X_obs.shape[0])
def FocusCorrection(lens, obj):

    ndim = 1
    dev_ids =  [str(x+1) for x in np.arange(ndim)]

    start_point = [[obj]]  
    mi_module = importlib.import_module('machine_interfaces.machine_interface_Defocus')
    mi = mi_module.machine_interface(dev_ids = dev_ids, start_point = start_point, lens = lens)
    mi.getState()
    
    gp_ls = np.array(np.ones(ndim)) * [0.317] 
    gp_amp = 0.256
    gp_noise = 0.000253
    gp_precisionmat =  np.array(np.diag(1/(gp_ls**2)))
    hyperparams = {'precisionMatrix': gp_precisionmat, 'amplitude_covar': gp_amp, 'noise_variance': gp_noise} 
    gp = OGP(ndim, hyperparams)
    
    opt = BayesOpt(gp, mi, acq_func="UCB", start_dev_vals = mi.x, dev_ids = dev_ids)
    opt.ucb_params = np.array([2, None])
    
    Obj_state_s=[]  # initialize empty Obj_state_s for each start point
    Niter = 10  # run 10 iterations for each case
    
    for i in range(Niter):
        Obj_state_s.append(opt.best_seen()[1])
        opt.OptIter()
        
    # the optimized objective lens current and corresponding defocus is saved in opt.best_seen()[0] and [1]
    res = opt.best_seen()
    del mi, gp, opt
    
    return res
Exemplo n.º 4
0
gp_lengthscales = np.array([0.0001])

#gp_precisionmat = 1/np.diag(gp_lengthscales**(2))

gp_precisionmat = np.diag(np.log(1. / gp_lengthscales**(2)))

#2. gp_amp
#gp_amp = 0.1
gp_amp = 10
#3. gp_noise
#gp_noise = 0.0001
gp_noise = 10**(-10)

hyps = [gp_precisionmat, np.log(gp_amp),
        np.log(gp_noise**2)]  #format the hyperparams for the OGP
gp = OGP(ndim, hyps)

#create the bayesian optimizer that will use the gp as the model to optimize the machine
#opt = BayesOpt(gp, mi, acq_func="UCB", start_dev_vals = mi.x, dev_ids = dev_ids)
#mi.setX(-0.004)
#opt = BayesOpt(gp, mi, acq_func="UCB", start_dev_vals = -0.004, dev_ids = dev_ids, bounds=((-np.inf, 0)))

mi.setX(-0.001)
s = mi.getState()
opt = BayesOpt(gp,
               mi,
               acq_func="UCB",
               dev_ids=dev_ids,
               bounds=((-0.00875, 0), ))

opt.ucb_params = [2, None]
Exemplo n.º 5
0
mi = mi_module.machine_interface(
    dev_ids=dev_ids, start_point=start_point
)  #an isotropic n-dimensional gaussian with amplitude=1, centered at the origin, plus gaussian background noise with std dev = 0.1

#create the gp
ndim = len(dev_ids)
# GP parameters
gp_precisionmat = scan_params['gp_precisionmat']
gp_amp = scan_params['gp_amp']
gp_noise_variance = scan_params['gp_noise']
hyperparams = {
    'precisionMatrix': gp_precisionmat,
    'amplitude_covar': gp_amp,
    'noise_variance': gp_noise_variance
}
gp = OGP(ndim, hyperparams)

#create the bayesian optimizer that will use the gp as the model to optimize the machine
opt = BayesOpt(gp, mi, acq_func="UCB", start_dev_vals=mi.x, dev_ids=dev_ids)
opt.ucb_params = scan_params[
    'ucb_params']  #set the acquisition function parameters
print('ucb_params', opt.ucb_params)

#run the gp search for some number of steps
Obj_state_s = []

optimize_kernel_on_the_fly = None  #optimize_kernel_on_the_fly is the iteration number to start optimize the kernel's hyperparmaters. If None, no optimization of the hypers during BO.

Niter = 10
for i in range(Niter):
    clear_output(wait=True)
class BayesOpt:
    """
    Contains the Bayesian optimization class with the following methods:
    acquire(): Returns the point that maximizes the acquisition function.
        For 'testEI', returns the index of the point instead.
        For normal acquisition, currently uses the bounded L-BFGS optimizer.
            Haven't tested alternatives much.
    best_seen(): Uses the model to make predictions at every observed point,
        returning the best-performing (x,y) pair. This is more robust to noise
        than returning the best observation, but could be replaced by other,
        faster methods.
    OptIter(): The main method for Bayesian optimization. Maximizes the
        acquisition function, then uses the interface to test this point and
        update the model.
    """
    
    def __init__(self, model, target_func, acq_func='EI', xi=0.0, alt_param=-1, m=200, bounds=None, iter_bound=False, prior_data=None, start_dev_vals=None, dev_ids=None, searchBoundScaleFactor=None, optimize_kernel_on_the_fly = None, verboseQ=False):
        """        
        Initialization parameters:
        --------------------------
        model: an object with methods 'predict', 'fit', and 'update'
                surrogate model to use
        interface: an object which supplies the state of the system and
            allows for changing the system's x-value.
            Should have methods '(x,y) = intfc.getState()' and 'intfc.setX(x_new)'.
            Note that this interface system is rough, and used for testing and
                as a placeholder for the machine interface.
        acq_func: specifies how the optimizer should choose its next point.
            'PI': uses probability of improvement. The interface should supply y-values.
            'EI': uses expected improvement. The interface should supply y-values.
            'UCB': uses GP upper confidence bound. No y-values needed.
            'testEI': uses EI over a finite set of points. This set must be
                provided as alt_param, and the interface need not supply
                meaningful y-values.
        xi: exploration parameter suggested in some Bayesian opt. literature
        alt_param: currently only used when acq_func=='testEI'
        m: the maximum size of model; can be ignored unless passing an untrained
            SPGP or other model which doesn't already know its own size
        bounds: a tuple of (min,max) tuples specifying search bounds for each
            input dimension. Generally leads to better performance.
            Has a different interpretation when iter_bounds is True.
        iter_bounds: if True, bounds the distance that can be moved in a single
            iteration in terms of the length scale in each dimension. Uses the
            bounds variable as a multiple of the length scales, so bounds==2
            with iter_bounds==True limits movement per iteration to two length
            scales in each dimension. Generally a good idea for safety, etc.
        prior_data: input data to train the model on initially. For convenience,
            since the model can be trained externally as well.
            Assumed to be a pandas DataFrame of shape (n, dim+1) where the last
                column contains y-values.
        optimize_kernel_on_the_fly: if not None, int which indicated the iteration number to start kernel optimization.
            Currently works for RBF only.
        """
        self.model = model
        self.m = m
        self.bounds = bounds
        self.searchBoundScaleFactor = 1.
        if type(searchBoundScaleFactor) is not type(None):
            try:
                self.searchBoundScaleFactor = abs(searchBoundScaleFactor)
            except:
                print(('BayesOpt - ERROR: ', searchBoundScaleFactor, ' is not a valid searchBoundScaleFactor (scaling coeff).'))
        self.iter_bound = iter_bound 
        self.prior_data = prior_data # for seeding the GP with data acquired by another optimizer
        self.target_func = target_func
        self.optimize_kernel_on_the_fly = optimize_kernel_on_the_fly 
        self.verboseQ = verboseQ
        if self.optimize_kernel_on_the_fly is not None: print('Run BO w/ kernel optimization on the fly')
        try: 
            self.mi = self.target_func.mi
        except:
            self.mi = self.target_func
        self.acq_func = (acq_func, xi, alt_param)
        #self.ucb_params = [0.24, 0.4] # [nu,delta] worked well for LCLS
        self.ucb_params = [2., None] # if we want to used a fixed scale factor of the standard deviation
        self.max_iter = 100
        self.alpha = 1.0 #control the ratio of exploration to exploitation in AI acuisition function
        self.kill = False
        self.ndim = np.array(start_dev_vals).size
        self.multiprocessingQ = multiprocessingQ # speed up acquisition function optimization
        self.dev_ids = dev_ids
        self.start_dev_vals = start_dev_vals
        self.pvs = self.dev_ids
        self.defocus_correction = False

        try:
            # get initial state
            print('Supposed to be grabbing initial machine state...')
            (x_init, y_init) = self.getState()
            print('x_init',x_init)
            print('y_init',y_init)
            self.X_obs = np.array(x_init)
            self.Y_obs = [y_init]
            self.current_x = np.array(np.array(x_init).flatten(), ndmin=2)
        except:
            print('BayesOpt - ERROR: Could not grab initial machine state')
        
        # calculate length scales
        try:
            self.lengthscales = self.model.lengthscales
        except:
            print('WARNING - GP.bayesian_optimization.BayesOpt: Using some unit length scales cause we messed up somehow...')
            self.lengthscales = np.ones(len(self.dev_ids))
        
        # make a copy of the initial params
        self.initial_hyperparams = {}
        self.initial_hyperparams['precisionMatrix'] = np.diag(1./copy.copy(self.lengthscales)**2)
        self.initial_hyperparams['noise_variance'] = copy.copy(self.model.noise_var) 
        self.initial_hyperparams['amplitude_covar'] = copy.copy(self.model.amplitude_covar)
        
        #initiate optimized hypers
        self.hyperparams_opt_all = {}
        self.hyperparams_opt_all['noise_variance'] = [copy.copy(self.model.noise_var)]
        self.hyperparams_opt_all['amplitude_covar'] = [copy.copy(self.model.amplitude_covar)]
        self.hyperparams_opt_all['precisionMatrix'] = [1./copy.copy(self.lengthscales)**2]
        
        if self.verboseQ:
            print('Using prior mean function of ', self.model.prmean)
            print('Using prior mean parameters of ', self.model.prmeanp)
        
        
    def getState(self):
        """
        get current state of the machine
        """
        x_vals, y_val = self.mi.getState()
        return x_vals, y_val

    
    def terminate(self, devices):
        """
        Sets the position back to the location that seems best in hindsight.
        It's a good idea to run this at the end of the optimization, since
        Bayesian optimization tries to explore and might not always end in
        a good place.
        """
        print(("TERMINATE", self.x_best))
        if(self.acq_func[0] == 'EI'):
            # set position back to something reasonable
            for i, dev in enumerate(devices):
                dev.set_value(self.x_best[i])
            #error_func(self.x_best)
        if(self.acq_func[0] == 'UCB'):
            # UCB doesn't keep track of x_best, so find it
            (x_best, y_best) = self.best_seen()
            for i, dev in enumerate(devices):
                dev.set_value(x_best[i])
                
    def sk_kernel(self, hypers_dict):
    
        amp = hypers_dict['amplitude_covar']
        lengthscales = np.diag(hypers_dict['precisionMatrix'])**-0.5
        noise_var = hypers_dict['noise_variance']
        
        se_ard = Ck(amp)*RBF(length_scale=lengthscales, length_scale_bounds=(1e-6,10))
        noise = WhiteKernel(noise_level=noise_var, noise_level_bounds=(1e-9, 1))  # noise terms
        
        sk_kernel = se_ard 
        if self.noiseQ:
            sk_kernel += noise
        t0 = time.time()        
        gpr = GaussianProcessRegressor(kernel=sk_kernel, n_restarts_optimizer=5)
        print("Initial kernel: %s" % gpr.kernel)
        
#         self.ytrain = [y[0][0] for y in self.Y_obs]
        
        gpr.fit(self.X_obs, np.array(self.Y_obs).flatten())
        print('SK fit time is ',time.time() - t0)
        print("Learned kernel: %s" % gpr.kernel_)
        print("Log-marginal-likelihood: %.3f" % gpr.log_marginal_likelihood(gpr.kernel_.theta))
        #print(gpr.kernel_.get_params())
        
        
        if self.noiseQ:
            # RBF w/ noise
            sk_ls = gpr.kernel_.get_params()['k1__k2__length_scale']
            sk_amp = gpr.kernel_.get_params()['k1__k1__constant_value']
            sk_loklik = gpr.log_marginal_likelihood(gpr.kernel_.theta)
            sk_noise = gpr.kernel_.get_params()['k2__noise_level']
        
        else:
            #RBF w/o noise
            sk_ls = gpr.kernel_.get_params()['k2__length_scale']
            sk_amp = gpr.kernel_.get_params()['k1__constant_value']
            sk_loklik = gpr.log_marginal_likelihood(gpr.kernel_.theta)
            sk_noise = 0

        # make dict
        sk_hypers = {}
        sk_hypers['precisionMatrix'] =  np.diag(1./(sk_ls**2)) 
        sk_hypers['noise_variance'] = sk_noise
        sk_hypers['amplitude_covar'] = sk_amp


        return sk_loklik, sk_hypers            
                
    def optimize_kernel_hyperparameters(self, noiseQ = False):
        """
       Optimize the kernel hyperparameters before acuiring the next point.
       This method optimizes the kernel twice - starting from the initial or last hyperparamters.
       Then compares the log likelihood and re-build the GP model using the most likely hypers.
       Note:  sk learn can't deal with matrix. so we can only optimize on lengthscales.
        """
        self.noiseQ = noiseQ
        # optimize kernel using SK learn from initial hyperparams
        print('optimize on initial hyperparams')
        sk_loklik0, sk_hypers0 = self.sk_kernel(self.initial_hyperparams)

        # optimize kernel using SK learn from current hyperparams        
        self.current_hyperparams = {}
        self.current_hyperparams['precisionMatrix'] = np.diag(1./self.model.lengthscales**2)
        self.current_hyperparams['noise_variance'] = self.model.noise_var
        self.current_hyperparams['amplitude_covar'] = self.model.amplitude_covar
                                    
        print('optimize on last hyperparams seen so far')
        sk_loklik, sk_hypers = self.sk_kernel(self.current_hyperparams)

        # compare likelihoods and choose best hyperparams
        if sk_loklik > sk_loklik0:
            hyperparams_opt = sk_hypers 
        else:
            hyperparams_opt  = sk_hypers0
            
        
        for key in hyperparams_opt:
            if key == 'precisionMatrix':
                self.hyperparams_opt_all[key] = np.array(list(chain(self.hyperparams_opt_all[key],                            [hyperparams_opt[key].diagonal()]))) 
            else:
                self.hyperparams_opt_all[key] = list(chain(self.hyperparams_opt_all[key], [hyperparams_opt[key]]))
        

        if self.verboseQ: print('hyperparams_opt ',hyperparams_opt)

        # create new OnlineGP model - overwrites the existing one
        if self.verboseQ: print('sanity dim check: ',self.model.dim == self.X_obs.shape[1])
            
        self.model = OGP(self.model.dim, hyperparams = hyperparams_opt, maxBV = self.model.maxBV, covar = self.model.covar) 
#         ,weighted=self.model.weighted, maxBV=self.model.maxBV) #, prmean=self.model.prmean, prmeanp=self.model.prmeanp, prvar=self.model.prvar, prvarp=self.model.prvarp , proj=self.model.proj,thresh=self.model.thresh, sparsityQ=self.model.sparsityQ, verboseQ=self.model.verboseQ)
        
        
        # initialize new model on current data
        self.model.fit(self.X_obs, np.array(self.Y_obs).flatten(), self.X_obs.shape[0])



    def minimize(self, error_func, x):
        """
        weighting for exploration vs exploitation in the GP 
        at the end of scan, alpha array goes from 1 to zero.
        """
        inverse_sign = -1
        self.current_x = np.array(np.array(x).flatten(), ndmin=2)
        self.X_obs = np.array(self.current_x)
        self.Y_obs = [np.array([[inverse_sign*error_func(x)]])]
        
        # iterate though the GP method
        for i in range(self.max_iter):
            # get next point to try using acquisition function
            x_next = self.acquire()
            
            if self.optimize_kernel_on_the_fly is not None:
                if i > self.optimize_kernel_on_the_fly:
                    print('****** Optimizing kerenl hyperparams')
                    self.optimize_kernel_hyperparameters()    

            y_new = error_func(x_next.flatten())
            if self.opt_ctrl.kill:
                print('WARNING - BayesOpt: Killing Bayesian optimizer...')
                break
            y_new = np.array([[inverse_sign *y_new]])

            # change position of interface
            x_new = deepcopy(x_next)
            self.current_x = x_new

            # add new entry to observed data
            self.X_obs = np.concatenate((self.X_obs, x_new), axis=0)
            self.Y_obs.append(y_new)

            # update the model (may want to add noise if using testEI)
            self.model.update(x_new, y_new)

            
    def OptIter(self,pause=0):
        """
        runs the optimizer for one iteration
        """
        
        # get next point to try using acquisition function
        x_next = self.acquire()
        if(self.acq_func[0] == 'testEI'):
            ind = x_next
            x_next = np.array(self.acq_func[2].iloc[ind,:-1],ndmin=2)    
        
        # change position of interface and get resulting y-value
        self.mi.setX(x_next)
        if(self.acq_func[0] == 'testEI'):
            (x_new, y_new) = (x_next, self.acq_func[2].iloc[ind,-1])
        else:
            (x_new, y_new) = self.mi.getState()

        # fix defocus here if necessary, make sure the saved observation comes from small defocus.
        if self.defocus_correction and self.mi.getDefocus() < 0.85:
            print('Correcting defocus using S2 lens...')
            new_S2 = self.mi.CorrectDefocus(self.mi.x, self.mi.S2)
            self.mi.setS2(new_S2[0][0])
            (x_new, y_new) = self.mi.getState()

        # add new entry to observed data
        self.X_obs = np.concatenate((self.X_obs,x_new),axis=0)
        self.Y_obs.append(y_new)
        
        # update the model (may want to add noise if using testEI)
        self.model.update(x_new, y_new)# + .5*np.random.randn())
        return x_new, y_new
            
            
    def ForcePoint(self,x_next):
        """
        force a point acquisition at our discretion and update the model
        """
        
        # change position of interface and get resulting y-value
        self.mi.setX(x_next)
        if(self.acq_func[0] == 'testEI'):
            (x_new, y_new) = (x_next, self.acq_func[2].iloc[ind,-1])
        else:
            (x_new, y_new) = self.mi.getState()
        # add new entry to observed data
        self.X_obs = np.concatenate((self.X_obs,x_new),axis=0)
        self.Y_obs.append(y_new)
        
        # update the model (may want to add noise if using testEI)
        self.model.update(x_new, y_new)

        
    def best_seen(self):
        """
        Checks the observed points to see which is predicted to be best.
        Probably safer than just returning the maximum observed, since the
        model has noise. It takes longer this way, though; you could
        instead take the model's prediction at the x-value that has
        done best if this needs to be faster.

        Not needed for UCB so do it the fast way (return max obs)
        """
        if(self.acq_func[0] == 'UCB'):
            mu = self.Y_obs
        else:
            (mu, var) = self.model.predict(self.X_obs)
            mu = [self.model.predict(np.array(x,ndmin=2))[0] for x in self.X_obs]

        (ind_best, mu_best) = max(enumerate(mu), key=op.itemgetter(1))
        return (self.X_obs[ind_best], mu_best)

    
    def acquire(self):
        """
        Computes the next point for the optimizer to try by maximizing
        the acquisition function. If movement per iteration is bounded,
        starts search at current position.
        """
        # look from best positions
        (x_best, y_best) = self.best_seen()
        self.x_best = x_best
        x_curr = self.current_x[-1]
        x_start = x_best
            
        ndim = x_curr.size # dimension of the feature space we're searching NEEDED FOR UCB
        try:
            nsteps = 1 + self.X_obs.shape[0] # acquisition number we're on  NEEDED FOR UCB
        except:
            nsteps = 1

        # check to see if this is bounding step sizes
        # print(self.iter_bound)
        if(self.iter_bound):
            if(self.bounds is None): # looks like a scale factor
                self.bounds = 1.0

            bound_lengths = self.searchBoundScaleFactor * 3. * self.lengthscales # 3x hyperparam lengths
            relative_bounds = np.transpose(np.array([-bound_lengths, bound_lengths]))
            
            iter_bounds = np.transpose(np.array([x_start - bound_lengths, x_start + bound_lengths]))

        else:
            iter_bounds = self.bounds
        # print(self.iter_bound, relative_bounds)
  
        # options for finding the peak of the acquisition function:
        optmethod = 'L-BFGS-B' # L-BFGS-B, BFGS, TNC, and SLSQP allow bounds whereas Powell and COBYLA don't
        maxiter = 1000 # max number of steps for one scipy.optimize.minimize call
        try:
            nproc = mp.cpu_count() # number of processes to launch minimizations on
        except:
            nproc = 1
        niter = 1 # max number of starting points for search
        niter_success = 1 # stop search if same minima for 10 steps
        tolerance = 1.e-4 # goal tolerance

        # perturb start to break symmetry?
        #x_start += np.random.randn(lengthscales.size)*lengthscales*1e-6

        # probability of improvement acquisition function
        if(self.acq_func[0] == 'PI'):
            aqfcn = negProbImprove
            fargs=(self.model, y_best, self.acq_func[1])

        # expected improvement acquisition function
        elif(self.acq_func[0] == 'EI'):
            aqfcn = negExpImprove
            fargs = (self.model, y_best, self.acq_func[1], self.alpha)

        # gaussian process upper confidence bound acquisition function
        elif(self.acq_func[0] == 'UCB'):
            aqfcn = negUCB
            fargs = (self.model, ndim, nsteps, self.ucb_params[0], self.ucb_params[1])

        # maybe something mitch was using once? (can probably remove)
        elif(self.acq_func[0] == 'testEI'):
            # collect all possible x values
            options = np.array(self.acq_func[2].iloc[:, :-1])
            (x_best, y_best) = self.best_seen()

            # find the option with best EI
            best_option_score = (-1,1e12)
            for i in range(options.shape[0]):
                result = negExpImprove(options[i],self.model,y_best,self.acq_func[1])
                if(result < best_option_score[1]):
                    best_option_score = (i, result)

            # return the index of the best option
            return best_option_score[0]

        else:
            print('WARNING - BayesOpt: Unknown acquisition function.')
            return 0

        try:
            if(self.multiprocessingQ): # multi-processing to speed search

                neval = 2*int(10.*2.**(ndim/12.))
                nkeep = 2*min(8,neval)

                # neval = int(3) 
                # nkeep = int(2)

                # add the 10 best points seen so far (largest Y_obs)
                nbest = 3 # add the best points seen so far (largest Y_obs)
                nstart = 1 # make sure some starting points are there to prevent run away searches
                
                yobs = np.array([y[0][0] for y in self.Y_obs])
                isearch = yobs.argsort()[-nbest:]
                for i in range(min(nstart,len(self.Y_obs))): #
                    if np.sum(isearch == i) == 0: # not found in list
                        isearch = np.append(isearch, i)
                        isearch.sort() # sort to bias searching near earlier steps

                v0s = None
                for i in isearch:
#                 """
#                 parallelgridsearch generates pseudo-random grid, then performs an ICDF transform
#                 to map to multinormal distrinbution centered on x_start and with widths given by hyper params
#                 """
                    vs = parallelgridsearch(aqfcn,self.X_obs[i],self.searchBoundScaleFactor * 0.6*self.lengthscales,fargs,neval,nkeep)
                                      
                    if type(v0s) == type(None):
                        v0s = copy.copy(vs)
                    else:
                        v0s = np.vstack((v0s,vs))

                v0sort = v0s[:,-1].argsort()[:nkeep] # keep the nlargest
                v0s = v0s[v0sort]
                
                x0s = v0s[:,:-1] # for later testing if the minimize results are better than the best starting point
                v0best = v0s[0]
                
                
                if basinhoppingQ:
                    print("multi-processing, basinhoppingQ")
                    # use basinhopping
                    bkwargs = dict(niter=niter,niter_success=niter_success, minimizer_kwargs={'method':optmethod,'args':fargs,'tol':tolerance,'bounds':iter_bounds,'options':{'maxiter':maxiter}}) # keyword args for basinhopping
                    res = parallelbasinhopping(aqfcn,x0s,bkwargs)
                
                else:
                    print("multi-processing, minimize")
                    # use minimize
                    mkwargs = dict(bounds=iter_bounds, method=optmethod, options={'maxiter':maxiter}, tol=tolerance) # keyword args for scipy.optimize.minimize
                    res = parallelminimize(aqfcn,x0s,fargs,mkwargs,v0best,relative_bounds=relative_bounds)

            else: # single-processing

                if basinhoppingQ:
                    print("single-processing, basinhoppingQ")
                    res = basinhopping(aqfcn, x_start,niter=niter,niter_success=niter_success, minimizer_kwargs={'method':optmethod,'args':(self.model, y_best, self.acq_func[1], self.alpha),'tol':tolerance,'bounds':iter_bounds,'options':{'maxiter':maxiter}})

                else:
                    print("single-processing, minimize")
                    # res = minimize(aqfcn, x_start, args=(self.model, y_best, self.acq_func[1], self.alpha), method=optmethod,tol=tolerance,bounds=iter_bounds,options={'maxiter':maxiter})
                    res = minimize(aqfcn, x_start, args=fargs, method=optmethod,tol=tolerance,bounds=iter_bounds,options={'maxiter':maxiter})
                res = res.x
                
        except:
            raise
        return np.array(res,ndmin=2) # return resulting x value as a (1 x dim) vector
Exemplo n.º 7
0
class BayesOpt:
    def __init__(self,
                 model,
                 target_func,
                 acq_func='EI',
                 xi=0.0,
                 alt_param=-1,
                 m=200,
                 bounds=None,
                 iter_bound=False,
                 prior_data=None,
                 start_dev_vals=None,
                 dev_ids=None,
                 searchBoundScaleFactor=None):
        self.model = model
        self.m = m
        self.bounds = bounds
        self.searchBoundScaleFactor = 1.
        if type(searchBoundScaleFactor) is not type(None):
            try:
                self.searchBoundScaleFactor = abs(searchBoundScaleFactor)
            except:
                print((
                    'BayesOpt - ERROR: ', searchBoundScaleFactor,
                    ' is not a valid searchBoundScaleFactor (scaling coeff).'))
        self.iter_bound = iter_bound
        self.prior_data = prior_data  # for seeding the GP with data acquired by another optimizer
        self.target_func = target_func
        print('target_func = ', target_func)
        try:
            self.mi = self.target_func.mi
            print('********* BO - self.mi = self.target_func.mi WORKED!')
        except:
            self.mi = self.target_func
            print('********* BO - self.mi = self.target_func WORKED!')
        self.acq_func = (acq_func, xi, alt_param)
        ## the nus in these here should be increased by a factor of npts_per_sample if using standard error of the mean as noise param
        ##self.ucb_params = [0.01, 2.] # [nu,delta]
        #self.ucb_params = [0.002, 0.4] # [nu,delta] we like
        ##self.ucb_params = [0.007, 1.0] # [nu,delta]
        # the nus in these here should be used with the standard error of the mean
        #self.ucb_params = [0.12, 2.] # [nu,delta]
        #        self.ucb_params = [0.24, 0.4] # [nu,delta] we like
        #self.ucb_params = [0.84, 1.0] # [nu,delta]
        self.ucb_params = [
            2., None
        ]  # if we want to used a fixed scale factor of the standard deviation
        self.max_iter = 100
        self.check = None
        self.alpha = 1
        self.kill = False
        self.ndim = np.array(start_dev_vals).size
        self.multiprocessingQ = multiprocessingQ  # speed up acquisition function optimization

        #Post-edit
        if self.mi.name == 'MultinormalInterface':
            self.dev_ids = self.mi.pvs[:-1]  # last pv is objective
            self.start_dev_vals = self.mi.x
        else:
            self.dev_ids = dev_ids
            self.start_dev_vals = start_dev_vals
        self.pvs = self.dev_ids
        self.pvs_ = [pv.replace(":", "_") for pv in self.pvs]

        try:
            # get initial state
            (x_init, y_init) = self.getState()
            print('Supposed to be grabbing machine state...')
            print('x_init', x_init)
            print('y_init', y_init)
            self.X_obs = np.array(x_init)
            self.Y_obs = [y_init]
            self.current_x = np.array(np.array(x_init).flatten(), ndmin=2)
        except:
            print('BayesOpt - ERROR: Could not grab initial machine state')

        # calculate length scales
        try:
            # length scales from covar params
            cp = self.model.covar_params[0]
            cps = np.shape(cp)
            lengthscales = np.sqrt(1. / np.exp(cp))
            if np.size(cps) == 2:
                if cps[0] < cps[1]:  # vector of lengths
                    self.lengthscales = lengthscales.flatten()
                else:  # matrix of lengths
                    self.lengthscales = np.diag(lengthscales)
        except:
            print(
                'WARNING - GP.bayesian_optimization.BayesOpt: Using some unit length scales cause we messed up somehow...'
            )
            self.lengthscales = np.ones(len(self.dev_ids))

        # make a copy of the initial params
        self.initial_hyperparams = {}
        self.initial_hyperparams['length scales'] = copy.copy(
            self.lengthscales)
        self.initial_hyperparams[
            'noise variance'] = self.model.noise_var  #np.exp(self.model.covar_params[2])
        self.initial_hyperparams['covar amplitude'] = np.exp(
            self.model.covar_params[1])

        ## initialize the prior
        #self.model.prmean = None # prior mean fcn
        #self.model.prmeanp = None # params of prmean fcn
        #self.model.prvar = None
        #self.model.prvarp = None
        #self.model.prmean_name = ''

    def getState(self):
        #print('>>>>>>>> getState')
        #x_vals = [self.mi.get_value(d) for d in self.dev_ids]
        #print('>>>>>>>>>>>>>>>>>>>> invoking get_penalty')
        #y_val = -self.target_func.get_penalty()
        #print(y_val)
        #print('>>>>>>>>>>>>> getState returning')

        #Note: Dylan edited this function on 2019-08-30 for use with his simple_machine_interface class by commenting out the lines above and replacing them with the line immediately below
        x_vals, y_val = self.mi.getState()
        return x_vals, y_val

    def terminate(self, devices):
        """
        Sets the position back to the location that seems best in hindsight.
        It's a good idea to run this at the end of the optimization, since
        Bayesian optimization tries to explore and might not always end in
        a good place.
        """
        print(("TERMINATE", self.x_best))
        if (self.acq_func[0] == 'EI'):
            # set position back to something reasonable
            for i, dev in enumerate(devices):
                dev.set_value(self.x_best[i])
            #error_func(self.x_best)
        if (self.acq_func[0] == 'UCB'):
            # UCB doesn't keep track of x_best, so find it
            (x_best, y_best) = self.best_seen()
            for i, dev in enumerate(devices):
                dev.set_value(x_best[i])

    def minimize(self, error_func, x):
        # weighting for exploration vs exploitation in the GP at the end of scan, alpha array goes from 1 to zero
        inverse_sign = -1
        self.current_x = np.array(np.array(x).flatten(), ndmin=2)
        self.X_obs = np.array(self.current_x)
        self.Y_obs = [np.array([[inverse_sign * error_func(x)]])]
        # iterate though the GP method
        for i in range(self.max_iter):
            print('<><><><><><><><> iter number {} <><><><><><><<<'.format(i))
            if (i > 2):  # and (np.mod(i,3) == 0):
                print('****** Optimizing kerenl hyperparams')
                self.optimize_log_lik()
            # get next point to try using acquisition function
            x_next = self.acquire(self.alpha)
            # check for problems with the beam
            if self.check != None: self.check.errorCheck()

            y_new = error_func(x_next.flatten())
            if self.opt_ctrl.kill:
                print('WARNING - BayesOpt: Killing Bayesian optimizer...')
                break
            y_new = np.array([[inverse_sign * y_new]])

            # change position of interface
            x_new = deepcopy(x_next)
            self.current_x = x_new

            # add new entry to observed data
            self.X_obs = np.concatenate((self.X_obs, x_new), axis=0)
            self.Y_obs.append(y_new)

            # update the model (may want to add noise if using testEI)
            self.model.update(x_new, y_new)

    def OptIter(self, pause=0):
        # runs the optimizer for one iteration

        # get next point to try using acquisition function
        x_next = self.acquire()
        if (self.acq_func[0] == 'testEI'):
            ind = x_next
            x_next = np.array(self.acq_func[2].iloc[ind, :-1], ndmin=2)

        # change position of interface and get resulting y-value
        self.mi.setX(x_next)
        if (self.acq_func[0] == 'testEI'):
            (x_new, y_new) = (x_next, self.acq_func[2].iloc[ind, -1])
        else:
            (x_new, y_new) = self.mi.getState()
        # add new entry to observed data
        self.X_obs = np.concatenate((self.X_obs, x_new), axis=0)
        self.Y_obs.append(y_new)

        # update the model (may want to add noise if using testEI)
        self.model.update(x_new, y_new)  # + .5*np.random.randn())

    def ForcePoint(self, x_next):
        # force a point acquisition at our discretion and update the model

        # change position of interface and get resulting y-value
        self.mi.setX(x_next)
        if (self.acq_func[0] == 'testEI'):
            (x_new, y_new) = (x_next, self.acq_func[2].iloc[ind, -1])
        else:
            (x_new, y_new) = self.mi.getState()
        # add new entry to observed data
        self.X_obs = np.concatenate((self.X_obs, x_new), axis=0)
        self.Y_obs.append(y_new)

        # update the model (may want to add noise if using testEI)
        self.model.update(x_new, y_new)

    def best_seen(self):
        """
        Checks the observed points to see which is predicted to be best.
        Probably safer than just returning the maximum observed, since the
        model has noise. It takes longer this way, though; you could
        instead take the model's prediction at the x-value that has
        done best if this needs to be faster.

        Not needed for UCB so do it the fast way (return max obs)
        """
        if (self.acq_func[0] == 'UCB'):
            mu = self.Y_obs
        else:
            (mu, var) = self.model.predict(self.X_obs)
            mu = [
                self.model.predict(np.array(x, ndmin=2))[0] for x in self.X_obs
            ]

        (ind_best, mu_best) = max(enumerate(mu), key=op.itemgetter(1))
        return (self.X_obs[ind_best], mu_best)

    def sk_kernel(self, amp, noise_var,
                  lengthscales):  #all params are variance
        #print('amp',amp,'noise_var',noise_var,'lengthscales',lengthscales)
        se_ard = Ck(amp) * RBF(length_scale=lengthscales,
                               length_scale_bounds=(1e-6, 20))
        noise = WhiteKernel(noise_level=noise_var,
                            noise_level_bounds=(1e-9, 100))  # noise terms
        # sk_kernel = se_ard + noise + Ck(0.4) #with bias
        sk_kernel = se_ard + noise
        t0 = time.time()
        gpr = GaussianProcessRegressor(kernel=sk_kernel,
                                       n_restarts_optimizer=5)
        print("Initial kernel: %s" % gpr.kernel)
        self.ytrain = [y[0][0] for y in self.Y_obs]
        gpr.fit(self.X_obs, self.ytrain)
        print('SK fit time is ', time.time() - t0)
        print("Learned kernel: %s" % gpr.kernel_)
        print("Log-marginal-likelihood: %.3f" %
              gpr.log_marginal_likelihood(gpr.kernel_.theta))
        #print(gpr.kernel_.get_params())

        sk_ls = gpr.kernel_.get_params()['k1__k2__length_scale']
        sk_noise = gpr.kernel_.get_params()['k2__noise_level']
        sk_amp = gpr.kernel_.get_params()['k1__k1__constant_value']
        sk_loklik = gpr.log_marginal_likelihood(gpr.kernel_.theta)

        #        #if bias is included use this:
        #        sk_ls = gpr.kernel_.get_params()['k1__k1__k2__length_scale']
        #        sk_noise = gpr.kernel_.get_params()['k1__k2__noise_level']
        #        sk_amp = gpr.kernel_.get_params()['k1__k1__k1__constant_value']
        #        sk_loklik = gpr.log_marginal_likelihood(gpr.kernel_.theta)

        return sk_loklik, sk_amp, sk_noise, sk_ls

    def optimize_log_lik(self):
        """
       Optimize the kernel hyperparameters before acuiring the next point
        """

        #inital hyperparams
        lengthscales = self.initial_hyperparams['length scales']
        noise_var = self.initial_hyperparams['noise variance']
        amp = self.initial_hyperparams['covar amplitude']

        # optimize hyperparams using SK learn
        sk_loklik0, sk_amp0, sk_noise0, sk_ls0 = self.sk_kernel(
            amp, noise_var, lengthscales)

        if self.X_obs.shape[0] > 1:  # don't run twice on the first step
            #last hyperparams seen so far
            current_covar = np.sqrt(
                1. / np.exp(self.model.covar_params[0])
            )  #this gives a full matrix and not just lengscale, the sk learn
            #can't deal with matrix.
            current_lengthscales = np.diag(
                current_covar)  #take only diagonal part - the lengthscales!
            current_covar_amp = np.exp(self.model.covar_params[1])
            current_noise_variance = self.model.noise_var

            #repeate SK learn
            print('optimize on last hyperparams seen so far')
            sk_loklik, sk_amp, sk_noise, sk_ls = self.sk_kernel(
                current_covar_amp, current_noise_variance,
                current_lengthscales)

            # compare lik and choose best hyperparams
            if sk_loklik > sk_loklik0:
                hyperparams_opt = (
                    (np.diag(1. / (sk_ls**2))), np.log(sk_amp),
                    np.log(sk_noise)
                )  #this is the required packing for the online gp code
            else:
                hyperparams_opt = ((np.diag(1. / (sk_ls0**2))),
                                   np.log(sk_amp0), np.log(sk_noise0))
            #print(hyperparams_opt)

        # create new OnlineGP model - overwrites the existing one
        print('sanity dim check: ', self.model.nin == self.X_obs.shape[1])
        self.model = OGP(
            self.model.nin,
            hyperparams=hyperparams_opt,
            maxBV=self.model.maxBV,
            covar=self.model.covar
        )  #, weighted=self.model.weighted,maxBV=self.model.maxBV, prmean=self.model.prmean, prmeanp=self.model.prmeanp, prvar=self.model.prvar, prvarp=self.model.prvarp,proj=self.model.proj,thresh=self.model.thresh, sparsityQ=self.model.sparsityQ)

        # initialize model on current data
        p_X = self.X_obs
        p_Y = self.ytrain
        num = p_X.shape[0]
        self.model.fit(p_X, p_Y, num)

    def acquire(self, alpha=1.):
        """
        Computes the next point for the optimizer to try by maximizing
        the acquisition function. If movement per iteration is bounded,
        starts search at current position.
        """
        # look from best positions
        (x_best, y_best) = self.best_seen()
        self.x_best = x_best
        x_curr = self.current_x[-1]
        x_start = x_best

        ndim = x_curr.size  # dimension of the feature space we're searching NEEDED FOR UCB
        try:
            nsteps = 1 + self.X_obs.shape[
                0]  # acquisition number we're on  NEEDED FOR UCB
        except:
            nsteps = 1

        # check to see if this is bounding step sizes
        if (self.iter_bound or True):
            if (self.bounds is None):  # looks like a scale factor
                self.bounds = 1.0

            bound_lengths = self.searchBoundScaleFactor * 3. * self.lengthscales  # 3x hyperparam lengths
            relative_bounds = np.transpose(
                np.array([-bound_lengths, bound_lengths]))

            #iter_bounds = np.transpose(np.array([x_start - bound_lengths, x_start + bound_lengths]))
            iter_bounds = np.transpose(
                np.array([x_start - bound_lengths, x_start + bound_lengths]))

        else:
            iter_bounds = self.bounds

        # options for finding the peak of the acquisition function:
        optmethod = 'L-BFGS-B'  # L-BFGS-B, BFGS, TNC, and SLSQP allow bounds whereas Powell and COBYLA don't
        maxiter = 1000  # max number of steps for one scipy.optimize.minimize call
        try:
            nproc = mp.cpu_count(
            )  # number of processes to launch minimizations on
        except:
            nproc = 1
        niter = 1  # max number of starting points for search
        niter_success = 1  # stop search if same minima for 10 steps
        tolerance = 1.e-4  # goal tolerance

        # perturb start to break symmetry?
        #x_start += np.random.randn(lengthscales.size)*lengthscales*1e-6

        # probability of improvement acquisition function
        if (self.acq_func[0] == 'PI'):
            aqfcn = negProbImprove
            fargs = (self.model, y_best, self.acq_func[1])

        # expected improvement acquisition function
        elif (self.acq_func[0] == 'EI'):
            aqfcn = negExpImprove
            fargs = (self.model, y_best, self.acq_func[1], alpha)

        # gaussian process upper confidence bound acquisition function
        elif (self.acq_func[0] == 'UCB'):
            aqfcn = negUCB
            fargs = (self.model, ndim, nsteps, self.ucb_params[0],
                     self.ucb_params[1])

        # maybe something mitch was using once? (can probably remove)
        elif (self.acq_func[0] == 'testEI'):
            # collect all possible x values
            options = np.array(self.acq_func[2].iloc[:, :-1])
            (x_best, y_best) = self.best_seen()

            # find the option with best EI
            best_option_score = (-1, 1e12)
            for i in range(options.shape[0]):
                result = negExpImprove(options[i], self.model, y_best,
                                       self.acq_func[1])
                if (result < best_option_score[1]):
                    best_option_score = (i, result)

            # return the index of the best option
            return best_option_score[0]

        else:
            print('WARNING - BayesOpt: Unknown acquisition function.')
            return 0

        try:

            if (self.multiprocessingQ):  # multi-processing to speed search

                neval = 2 * int(10. * 2.**(ndim / 12.))
                nkeep = 2 * min(8, neval)

                # parallelgridsearch generates pseudo-random grid, then performs an ICDF transform
                # to map to multinormal distrinbution centered on x_start and with widths given by hyper params

                # add the 10 best points seen so far (largest Y_obs)
                nbest = 3  # add the best points seen so far (largest Y_obs)
                nstart = 2  # make sure some starting points are there to prevent run away searches

                yobs = np.array([y[0][0] for y in self.Y_obs])
                isearch = yobs.argsort()[-nbest:]
                for i in range(min(nstart, len(self.Y_obs))):  #
                    if np.sum(isearch == i) == 0:  # not found in list
                        isearch = np.append(isearch, i)
                        isearch.sort(
                        )  # sort to bias searching near earlier steps

                v0s = None

                for i in isearch:

                    vs = parallelgridsearch(
                        aqfcn, self.X_obs[i],
                        self.searchBoundScaleFactor * 0.6 * self.lengthscales,
                        fargs, neval, nkeep)

                    if type(v0s) == type(None):
                        v0s = copy.copy(vs)
                    else:
                        v0s = np.vstack((v0s, vs))

                v0sort = v0s[:, -1].argsort()[:nkeep]  # keep the nlargest
                v0s = v0s[v0sort]

                x0s = v0s[:, :
                          -1]  # for later testing if the minimize results are better than the best starting point
                v0best = v0s[0]

                if basinhoppingQ:
                    # use basinhopping
                    bkwargs = dict(niter=niter,
                                   niter_success=niter_success,
                                   minimizer_kwargs={
                                       'method': optmethod,
                                       'args': fargs,
                                       'tol': tolerance,
                                       'bounds': iter_bounds,
                                       'options': {
                                           'maxiter': maxiter
                                       }
                                   })  # keyword args for basinhopping
                    res = parallelbasinhopping(aqfcn, x0s, bkwargs)

                else:
                    # use minimize
                    mkwargs = dict(
                        bounds=iter_bounds,
                        method=optmethod,
                        options={'maxiter': maxiter},
                        tol=tolerance
                    )  # keyword args for scipy.optimize.minimize
                    res = parallelminimize(aqfcn,
                                           x0s,
                                           fargs,
                                           mkwargs,
                                           v0best,
                                           relative_bounds=relative_bounds)

            else:  # single-processing

                if basinhoppingQ:
                    res = basinhopping(aqfcn,
                                       x_start,
                                       niter=niter,
                                       niter_success=niter_success,
                                       minimizer_kwargs={
                                           'method':
                                           optmethod,
                                           'args': (self.model, y_best,
                                                    self.acq_func[1], alpha),
                                           'tol':
                                           tolerance,
                                           'bounds':
                                           iter_bounds,
                                           'options': {
                                               'maxiter': maxiter
                                           }
                                       })

                else:
                    res = minimize(aqfcn,
                                   x_start,
                                   args=(self.model, y_best, self.acq_func[1],
                                         alpha),
                                   method=optmethod,
                                   tol=tolerance,
                                   bounds=iter_bounds,
                                   options={'maxiter': maxiter})

                res = res.x

        except:
            raise
        return np.array(
            res, ndmin=2)  # return resulting x value as a (1 x dim) vector