def predict(self, z): if self.hyp is None: logging.error('Train GP before using it!!') return None, None, None, None # try: z = array(z) ## to ensure same input format if z.shape[0] == 1: ## for some reason cand do proper predictions for one element... shape_was_one = True z = array([z[0],z[0]]) else: shape_was_one = False # Scale inputs. it allows us to realod the regressor not retraining the model self.input_scaler = preprocessing.StandardScaler().fit(self.training_set) if self.transLog: self.output_scaler = preprocessing.StandardScaler(with_std=False).fit(log(self.training_fitness - self.shift_by())) self.adjusted_training_fitness = self.output_scaler.transform(log(self.training_fitness - self.shift_by())) else: self.output_scaler = preprocessing.StandardScaler(with_std=False).fit(self.training_fitness) self.adjusted_training_fitness = self.output_scaler.transform(self.training_fitness) self.scaled_training_set = self.input_scaler.transform(self.training_set) ## do predictions try: vargout = gp(self.hyp, self.inffunc,self.meanfunc,self.covfunc,self.likfunc,self.scaled_training_set ,self.adjusted_training_fitness, self.input_scaler.transform(z)) except Exception,e: logging.error(str(e)) return None, None, None, None
def train_cross(self): ## not sure how importatn this crap is.. ## SET (hyper)parameters n_iters = len(self.training_set) * 5 hyp = hyperParameters() sn = 0.001; hyp.lik = array([log(sn)]) conf = self.conf dimensions = len(self.training_set[0]) hyp.mean = [0.5 for d in xrange(dimensions)] hyp.mean.append(1.0) hyp.mean = array(hyp.mean) hyp.mean = array([]) # Scale inputs and particles? self.input_scaler = preprocessing.StandardScaler().fit(self.training_set) self.scaled_training_set = self.input_scaler.transform(self.training_set) # Scale training data if self.transLog: self.output_scaler = preprocessing.StandardScaler(with_std=False).fit(log(self.training_fitness - self.shift_by())) self.adjusted_training_fitness = self.output_scaler.transform(log(self.training_fitness - self.shift_by())) else: self.output_scaler = preprocessing.StandardScaler(with_std=False).fit(self.training_fitness) self.adjusted_training_fitness = self.output_scaler.transform(self.training_fitness) ## retrain a number of times and pick best likelihood press_best = None best_hyp = None i = 0 index_array = ShuffleSplit(len(self.scaled_training_set), n_iter=n_iters, train_size=0.8, test_size=0.2) ##we use 10% of example to evaluate our while i < conf.random_start: if conf.corr == "isotropic": self.covfunc = [['kernels.covSum'], [['kernels.covSEiso'],['kernels.covNoise']]] hyp.cov = [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(2)] elif conf.corr == "anisotropic": self.covfunc = [['kernels.covSum'], [['kernels.covSEard'],['kernels.covNoise']]] hyp.cov = [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(dimensions+1)] elif conf.corr == "anirat": ## todo self.covfunc = [['kernels.covSum'], [['kernels.covRQard'],['kernels.covNoise']]] hyp.cov = [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(dimensions)] hyp.cov.append(log(uniform(low=conf.thetaL, high=conf.thetaU))) hyp.cov.append(log(uniform(low=conf.thetaL, high=conf.thetaU))) elif conf.corr == "matern3": self.covfunc = [['kernels.covSum'], [['kernels.covMatern'],['kernels.covNoise']]] hyp.cov = [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(2)] hyp.cov.append(log(3)) elif conf.corr == "matern5": self.covfunc = [['kernels.covSum'], [['kernels.covMatern'],['kernels.covNoise']]] hyp.cov = [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(2)] hyp.cov.append(log(5)) elif conf.corr == "rqard": self.covfunc = [['kernels.covSum'], [['kernels.covRQard'],['kernels.covNoise']]] hyp.cov = [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(dimensions+2)] elif conf.corr == "special": self.covfunc = [['kernels.covSum'], [['kernels.covSEiso'],['kernels.covMatern'],['kernels.covNoise']]] hyp.cov = [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(2)] hyp.cov = hyp.cov + [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(2)] hyp.cov.append(log(3)) else: logging.error("The specified kernel function is not supported") return False hyp.cov.append(log(uniform(low=0.01, high=0.5))) ## 50% propability to usen the previous best hyper-parameters: if self.hyp: random_number = uniform(0.,1.) if random_number < 0.5: hyp = self.hyp try: vargout = min_wrapper(hyp,gp,'BFGS',self.inffunc,self.meanfunc,self.covfunc,self.likfunc,self.scaled_training_set ,self.adjusted_training_fitness,None,None,True) hyp = vargout[0] ### we add some sensible checking.. ## matern we dont want to overfit ## we know that the function is not just noise hence < -1 ## we know for anisotorpic that at least one parameter has to have some meaning ## press = 0.0 for train_indexes, test_indexes in index_array: test_set = self.scaled_training_set[test_indexes] training_set = self.scaled_training_set[train_indexes] test_fitness = self.adjusted_training_fitness[test_indexes] training_fitness = self.adjusted_training_fitness[train_indexes] vargout = gp(hyp, self.inffunc,self.meanfunc,self.covfunc, self.likfunc, training_set, training_fitness, test_set) predicted_fitness = vargout[2] press = press + self.calc_press(predicted_fitness, test_fitness) #if (hyp.cov[-1] < -1.) and not ((conf.corr == "matern3") and hyp.cov[0] < 0.0) and not ((conf.corr == "anisotropic") and all(hyp.cov[:-2] < 0.0)): logging.info("Press " + str(press) + " " + str(hyp.cov)) if (((not press_best) or (press < press_best))): best_hyp = hyp press_best = press except Exception, e: logging.debug("Regressor training Failed: " + str(e)) i = i + 1
def train_nlml(self): ## not sure how importatn this crap is.. ## SET (hyper)parameters hyp = hyperParameters() sn = 0.001; hyp.lik = array([log(sn)]) conf = self.conf dimensions = len(self.training_set[0]) hyp.mean = [0.5 for d in xrange(dimensions)] hyp.mean.append(1.0) hyp.mean = array(hyp.mean) hyp.mean = array([]) # Scale inputs and particles? self.input_scaler = preprocessing.StandardScaler().fit(self.training_set) self.scaled_training_set = self.input_scaler.transform(self.training_set) # Scale training data self.output_scaler = preprocessing.StandardScaler(with_std=False).fit(log(self.training_fitness - self.shift_by())) self.adjusted_training_fitness = self.output_scaler.transform(log(self.training_fitness - self.shift_by())) ## retrain a number of times and pick best likelihood nlml_best = None i = 0 while i < conf.random_start: if conf.corr == "isotropic": self.covfunc = [['kernels.covSum'], [['kernels.covSEiso'],['kernels.covNoise']]] hyp.cov = [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(2)] elif conf.corr == "anisotropic": self.covfunc = [['kernels.covSum'], [['kernels.covSEard'],['kernels.covNoise']]] hyp.cov = [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(dimensions+1)] elif conf.corr == "anirat": ## todo self.covfunc = [['kernels.covSum'], [['kernels.covRQard'],['kernels.covNoise']]] hyp.cov = [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(dimensions)] hyp.cov.append(log(uniform(low=conf.thetaL, high=conf.thetaU))) hyp.cov.append(log(uniform(low=conf.thetaL, high=conf.thetaU))) elif conf.corr == "matern3": self.covfunc = [['kernels.covSum'], [['kernels.covMatern'],['kernels.covNoise']]] hyp.cov = [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(2)] hyp.cov.append(log(3)) elif conf.corr == "matern5": self.covfunc = [['kernels.covSum'], [['kernels.covMatern'],['kernels.covNoise']]] hyp.cov = [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(2)] hyp.cov.append(log(5)) elif conf.corr == "rqard": self.covfunc = [['kernels.covSum'], [['kernels.covRQard'],['kernels.covNoise']]] hyp.cov = [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(dimensions+2)] elif conf.corr == "special": self.covfunc = [['kernels.covSum'], [['kernels.covSEiso'],['kernels.covMatern'],['kernels.covNoise']]] hyp.cov = [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(2)] hyp.cov = hyp.cov + [log(uniform(low=conf.thetaL, high=conf.thetaU)) for d in xrange(2)] hyp.cov.append(log(3)) else: logging.error("The specified kernel function is not supported") return False hyp.cov.append(log(uniform(low=0.1, high=1.0))) try: vargout = min_wrapper(hyp,gp,'BFGS',self.inffunc,self.meanfunc,self.covfunc,self.likfunc,self.scaled_training_set ,self.adjusted_training_fitness,None,None,True) hyp = vargout[0] vargout = gp(hyp, self.inffunc,self.meanfunc,self.covfunc,self.likfunc,self.scaled_training_set ,self.adjusted_training_fitness, None,None,False) nlml = vargout[0] ### we add some sensible checking.. ## matern we dont want to overfit ## we know that the function is not just noise hence < -1 ## we know for anisotorpic that at least one parameter has to have some meaning ## if (hyp.cov[-1] < -1.) and not ((conf.corr == "matern3") and hyp.cov[0] < 0.0) and not ((conf.corr == "anisotropic") and all(hyp.cov[:-2] < 0.0)): logging.info(str(nlml) + " " + str(hyp.cov)) if (((not nlml_best) or (nlml < nlml_best))): self.hyp = hyp nlml_best = nlml else: logging.info("hyper parameter out of spec: " + str(nlml) + " " + str(hyp.cov) + " " + str(hyp.cov[-1])) i = i - 1 except Exception, e: logging.debug("Regressor training Failed: " + str(e)) i = i - 1 i = i + 1