def setUp(self): eps = 1e-8 xlimits = np.asarray([[0, 1], [0, 1]]) self.random = np.random.RandomState(42) lhs = LHS(xlimits=xlimits, random_state=self.random) X = lhs(8) y = LHS(xlimits=np.asarray([[0, 1]]), random_state=self.random)(8) X_norma, y_norma, X_offset, y_mean, X_scale, y_std = standardization(X, y) D, ij = cross_distances(X_norma) theta = self.random.rand(2) corr_str = ["abs_exp", "squar_exp", "act_exp", "matern32", "matern52"] corr_def = [abs_exp, squar_exp, act_exp, matern32, matern52] self.eps = eps self.X = X self.y = y ( self.X_norma, self.y_norma, self.X_offset, self.y_mean, self.X_scale, self.y_std, ) = ( X_norma, y_norma, X_offset, y_mean, X_scale, y_std, ) self.D, self.ij = D, ij self.theta = theta self.corr_str = corr_str self.corr_def = corr_def
def _new_train_iteration(self, lvl): n_samples = self.nt_all self.X_norma = self.X_norma_all[lvl] self.y_norma = self.y_norma_all[lvl] # Calculate matrix of distances D between samples self.D_all[lvl] = cross_distances(self.X_norma) # Regression matrix and parameters self.F_all[lvl] = self._regression_types[self.options["poly"]]( self.X_norma) self.p_all[lvl] = self.F_all[lvl].shape[1] # Concatenate the autoregressive part for levels > 0 if lvl > 0: F_rho = self._regression_types[self.options["rho_regr"]]( self.X_norma) self.q_all[lvl] = F_rho.shape[1] self.F_all[lvl] = np.hstack(( F_rho * np.dot( self._predict_intermediate_values( self.X_norma, lvl, descale=False), np.ones((1, self.q_all[lvl])), ), self.F_all[lvl], )) else: self.q_all[lvl] = 0 n_samples_F_i = self.F_all[lvl].shape[0] if n_samples_F_i != n_samples[lvl]: raise Exception("Number of rows in F and X do not match. Most " "likely something is going wrong with the " "regression model.") if int(self.p_all[lvl] + self.q_all[lvl]) >= n_samples_F_i: raise Exception(("Ordinary least squares problem is undetermined " "n_samples=%d must be greater than the regression" " model size p+q=%d.") % (n_samples_F_i, self.p_all[lvl] + self.q_all[lvl])) # Determine Gaussian Process model parameters self.F = self.F_all[lvl] D, self.ij = self.D_all[lvl] self._lvl = lvl self.nt = self.nt_all[lvl] self.q = self.q_all[lvl] self.p = self.p_all[lvl] ( self.optimal_rlf_value[lvl], self.optimal_par[lvl], self.optimal_theta[lvl], ) = self._optimize_hyperparam(D) if self.options["eval_noise"]: tmp_list = self.optimal_theta[lvl] self.optimal_theta[lvl] = tmp_list[:-1] self.noise[lvl] = tmp_list[-1] del self.y_norma, self.D
def setUp(self): eps = 1e-8 xlimits = np.asarray([[0, 1], [0, 1]]) self.random = np.random.RandomState(42) lhs = LHS(xlimits=xlimits, random_state=self.random) X = lhs(8) y = LHS(xlimits=np.asarray([[0, 1]]), random_state=self.random)(8) X_norma, y_norma, X_offset, y_mean, X_scale, y_std = standardization( X, y) D, ij = cross_distances(X_norma) theta = self.random.rand(2) corr_str = ["abs_exp", "squar_exp", "act_exp", "matern32", "matern52"] corr_def = [abs_exp, squar_exp, act_exp, matern32, matern52] self.eps = eps self.X = X self.y = y ( self.X_norma, self.y_norma, self.X_offset, self.y_mean, self.X_scale, self.y_std, ) = ( X_norma, y_norma, X_offset, y_mean, X_scale, y_std, ) self.D, self.ij = D, ij self.theta = theta self.corr_str = corr_str self.corr_def = corr_def def test_noise_estimation(self): xt = np.array([[0.0], [1.0], [2.0], [3.0], [4.0]]) yt = np.array([0.0, 1.0, 1.5, 0.9, 1.0]) sm = KRG(hyper_opt="Cobyla", eval_noise=True, noise0=[1e-4]) sm.set_training_values(xt, yt) sm.train() x = np.linspace(0, 4, 100) y = sm.predict_values(x) self.assert_error(np.array(sm.optimal_theta), np.array([1.6]), 1e-1, 1e-1)
def _new_train(self): self._check_param() # Sampling points X and y X = self.training_points[None][0][0] y = self.training_points[None][0][1] # Compute PLS-coefficients (attr of self) and modified X and y (if GEKPLS is used) if self.name not in ["Kriging", "MGP"]: X, y = self._compute_pls(X.copy(), y.copy()) # Center and scale X and y ( self.X_norma, self.y_norma, self.X_mean, self.y_mean, self.X_std, self.y_std, ) = standardization(X, y) # Calculate matrix of distances D between samples D, self.ij = cross_distances(self.X_norma) if np.min(np.sum(np.abs(D), axis=1)) == 0.0: print( "Warning: multiple x input features have the same value (at least same row twice)." ) #### # Regression matrix and parameters self.F = self._regression_types[self.options["poly"]](self.X_norma) n_samples_F = self.F.shape[0] if self.F.ndim > 1: p = self.F.shape[1] else: p = 1 self._check_F(n_samples_F, p) # Optimization ( self.optimal_rlf_value, self.optimal_par, self.optimal_theta, ) = self._optimize_hyperparam(D) if self.name in ["MFK", "MFKPLS", "MFKPLSK"]: if self.options["eval_noise"]: self.optimal_theta = self.optimal_theta[:-1] elif self.name in ["MGP"]: self._specific_train()
def _new_train(self): """ Overrides KrgBased implementation Trains the Multi-Fidelity model """ xt = [] yt = [] i = 0 while self.training_points.get(i, None) is not None: xt.append(self.training_points[i][0][0]) yt.append(self.training_points[i][0][1]) i = i + 1 xt.append(self.training_points[None][0][0]) yt.append(self.training_points[None][0][1]) self._check_list_structure(xt, yt) self._check_param() X = self.X y = self.y _, _, self.X_offset, self.y_mean, self.X_scale, self.y_std = standardization( np.concatenate(xt, axis=0), np.concatenate(yt, axis=0)) nlevel = self.nlvl n_samples = self.nt_all # initialize lists self.noise_all = nlevel * [0] self.D_all = nlevel * [0] self.F_all = nlevel * [0] self.p_all = nlevel * [0] self.q_all = nlevel * [0] self.optimal_rlf_value = nlevel * [0] self.optimal_par = nlevel * [{}] self.optimal_theta = nlevel * [0] self.X_norma_all = [(x - self.X_offset) / self.X_scale for x in X] self.y_norma_all = [(f - self.y_mean) / self.y_std for f in y] noise0 = self.options["noise0"].copy() theta0 = self.options["theta0"].copy() for lvl in range(nlevel): self.options["noise0"] = [noise0[lvl]] self.options["theta0"] = theta0[lvl, :] self.X_norma = self.X_norma_all[lvl] self.y_norma = self.y_norma_all[lvl] if self.options["eval_noise"] and self.options["use_het_noise"]: # hetGP works with unique design variables ( self.X_norma, self.index_unique, # do we need to store it? self.nt_reps, # do we need to store it? ) = np.unique(self.X_norma, return_inverse=True, return_counts=True, axis=0) self.nt_all[lvl] = self.X_norma.shape[0] # computing the mean of the output per unique design variable (see Binois et al., 2018) y_norma_unique = [] for i in range(self.nt_all[lvl]): y_norma_unique.append( np.mean(self.y_norma[self.index_unique == i])) # pointwise sensible estimates of the noise variances (see Ankenman et al., 2010) self.noise = self.options["noise0"] * np.ones(self.nt_all[lvl]) for i in range(self.nt_all[lvl]): diff = self.y_norma[self.index_unique == i] - y_norma_unique[i] if np.sum(diff**2) != 0.0: self.noise[i] = np.std(diff, ddof=1)**2 self.noise = self.noise.tolist() / self.nt_reps self.y_norma = y_norma_unique self.X_norma_all[lvl] = self.X_norma self.y_norma_all[lvl] = self.y_norma # Calculate matrix of distances D between samples self.D_all[lvl] = cross_distances(self.X_norma) # Regression matrix and parameters self.F_all[lvl] = self._regression_types[self.options["poly"]]( self.X_norma) self.p_all[lvl] = self.F_all[lvl].shape[1] # Concatenate the autoregressive part for levels > 0 if lvl > 0: F_rho = self._regression_types[self.options["rho_regr"]]( self.X_norma) self.q_all[lvl] = F_rho.shape[1] self.F_all[lvl] = np.hstack(( F_rho * np.dot( self._predict_intermediate_values( self.X_norma, lvl, descale=False), np.ones((1, self.q_all[lvl])), ), self.F_all[lvl], )) else: self.q_all[lvl] = 0 n_samples_F_i = self.F_all[lvl].shape[0] if n_samples_F_i != n_samples[lvl]: raise Exception("Number of rows in F and X do not match. Most " "likely something is going wrong with the " "regression model.") if int(self.p_all[lvl] + self.q_all[lvl]) >= n_samples_F_i: raise Exception( ("Ordinary least squares problem is undetermined " "n_samples=%d must be greater than the regression" " model size p+q=%d.") % (n_samples[i], self.p_all[lvl] + self.q_all[lvl])) # Determine Gaussian Process model parameters self.F = self.F_all[lvl] D, self.ij = self.D_all[lvl] self._lvl = lvl self.nt = self.nt_all[lvl] self.q = self.q_all[lvl] self.p = self.p_all[lvl] ( self.optimal_rlf_value[lvl], self.optimal_par[lvl], self.optimal_theta[lvl], ) = self._optimize_hyperparam(D) if self.options["eval_noise"]: tmp_list = self.optimal_theta[lvl] self.optimal_theta[lvl] = tmp_list[0:D.shape[1]] self.noise_all[lvl] = tmp_list[D.shape[1]:] del self.y_norma, self.D self.options["noise0"] = noise0 self.options["theta0"] = theta0 if self.options["eval_noise"] and self.options["optim_var"]: for lvl in range(self.nlvl - 1): self.set_training_values(X[lvl], self._predict_intermediate_values( X[lvl], lvl + 1), name=lvl) self.set_training_values( X[-1], self._predict_intermediate_values(X[-1], self.nlvl)) self.options["eval_noise"] = False self._new_train()
def _new_train_iteration(self, lvl): n_samples = self.nt_all self.options["noise0"] = np.array([self.options["noise0"][lvl] ]).flatten() self.options["theta0"] = self.options["theta0"][lvl, :] self.X_norma = self.X_norma_all[lvl] self.y_norma = self.y_norma_all[lvl] if self.options["eval_noise"]: if self.options["use_het_noise"]: # hetGP works with unique design variables ( self.X_norma, self.index_unique, # do we need to store it? self.nt_reps, # do we need to store it? ) = np.unique(self.X_norma, return_inverse=True, return_counts=True, axis=0) self.nt_all[lvl] = self.X_norma.shape[0] # computing the mean of the output per unique design variable (see Binois et al., 2018) y_norma_unique = [] for i in range(self.nt_all[lvl]): y_norma_unique.append( np.mean(self.y_norma[self.index_unique == i])) y_norma_unique = np.array(y_norma_unique).reshape(-1, 1) # pointwise sensible estimates of the noise variances (see Ankenman et al., 2010) self.optimal_noise = self.options["noise0"] * np.ones( self.nt_all[lvl]) for i in range(self.nt_all[lvl]): diff = self.y_norma[self.index_unique == i] - y_norma_unique[i] if np.sum(diff**2) != 0.0: self.optimal_noise[i] = np.std(diff, ddof=1)**2 self.optimal_noise = self.optimal_noise / self.nt_reps self.optimal_noise_all[lvl] = self.optimal_noise self.y_norma = y_norma_unique self.X_norma_all[lvl] = self.X_norma self.y_norma_all[lvl] = self.y_norma else: self.optimal_noise = self.options["noise0"] / self.y_std**2 self.optimal_noise_all[lvl] = self.optimal_noise # Calculate matrix of distances D between samples self.D_all[lvl] = cross_distances(self.X_norma) # Regression matrix and parameters self.F_all[lvl] = self._regression_types[self.options["poly"]]( self.X_norma) self.p_all[lvl] = self.F_all[lvl].shape[1] # Concatenate the autoregressive part for levels > 0 if lvl > 0: F_rho = self._regression_types[self.options["rho_regr"]]( self.X_norma) self.q_all[lvl] = F_rho.shape[1] self.F_all[lvl] = np.hstack(( F_rho * np.dot( self._predict_intermediate_values( self.X_norma, lvl, descale=False), np.ones((1, self.q_all[lvl])), ), self.F_all[lvl], )) else: self.q_all[lvl] = 0 n_samples_F_i = self.F_all[lvl].shape[0] if n_samples_F_i != n_samples[lvl]: raise Exception("Number of rows in F and X do not match. Most " "likely something is going wrong with the " "regression model.") if int(self.p_all[lvl] + self.q_all[lvl]) >= n_samples_F_i: raise Exception(("Ordinary least squares problem is undetermined " "n_samples=%d must be greater than the regression" " model size p+q=%d.") % (n_samples_F_i, self.p_all[lvl] + self.q_all[lvl])) # Determine Gaussian Process model parameters self.F = self.F_all[lvl] D, self.ij = self.D_all[lvl] self._lvl = lvl self.nt = self.nt_all[lvl] self.q = self.q_all[lvl] self.p = self.p_all[lvl] ( self.optimal_rlf_value[lvl], self.optimal_par[lvl], self.optimal_theta[lvl], ) = self._optimize_hyperparam(D) if self.options["eval_noise"] and not self.options["use_het_noise"]: tmp_list = self.optimal_theta[lvl] self.optimal_theta[lvl] = tmp_list[:-1] self.optimal_noise = tmp_list[-1] self.optimal_noise_all[lvl] = self.optimal_noise del self.y_norma, self.D, self.optimal_noise
def _new_train(self): # Sampling points X and y X = self.training_points[None][0][0] y = self.training_points[None][0][1] # Compute PLS-coefficients (attr of self) and modified X and y (if GEKPLS is used) if self.name not in ["Kriging", "MGP"]: X, y = self._compute_pls(X.copy(), y.copy()) self._check_param() if self.options["corr"] == "gower": self.X_train = X Xt = X _, x_n_cols = Xt.shape cat_features = np.zeros(x_n_cols, dtype=bool) for col in range(x_n_cols): if not np.issubdtype(type(Xt[0, col]), np.float): cat_features[col] = True X_cont = Xt[:, np.logical_not(cat_features)].astype(np.float) ( self.X_norma, self.y_norma, self.X_offset, self.y_mean, self.X_scale, self.y_std, ) = standardization(X_cont, y) D, self.ij = gower_distances(X) else: # Center and scale X and y ( self.X_norma, self.y_norma, self.X_offset, self.y_mean, self.X_scale, self.y_std, ) = standardization(X, y) if not self.options["eval_noise"]: self.optimal_noise = np.array(self.options["noise0"]) elif self.options["use_het_noise"]: # hetGP works with unique design variables when noise variance are not given ( self.X_norma, index_unique, nt_reps, ) = np.unique(self.X_norma, return_inverse=True, return_counts=True, axis=0) self.nt = self.X_norma.shape[0] # computing the mean of the output per unique design variable (see Binois et al., 2018) y_norma_unique = [] for i in range(self.nt): y_norma_unique.append(np.mean(self.y_norma[index_unique == i])) # pointwise sensible estimates of the noise variances (see Ankenman et al., 2010) self.optimal_noise = self.options["noise0"] * np.ones(self.nt) for i in range(self.nt): diff = self.y_norma[index_unique == i] - y_norma_unique[i] if np.sum(diff**2) != 0.0: self.optimal_noise[i] = np.std(diff, ddof=1)**2 self.optimal_noise = self.optimal_noise / nt_reps self.y_norma = y_norma_unique if self.options["corr"] != "gower": # Calculate matrix of distances D between samples D, self.ij = cross_distances(self.X_norma) if np.min(np.sum(np.abs(D), axis=1)) == 0.0: print( "Warning: multiple x input features have the same value (at least same row twice)." ) #### # Regression matrix and parameters self.F = self._regression_types[self.options["poly"]](self.X_norma) n_samples_F = self.F.shape[0] if self.F.ndim > 1: p = self.F.shape[1] else: p = 1 self._check_F(n_samples_F, p) # Optimization ( self.optimal_rlf_value, self.optimal_par, self.optimal_theta, ) = self._optimize_hyperparam(D) if self.name in ["MGP"]: self._specific_train() else: if self.options["eval_noise"] and not self.options["use_het_noise"]: self.optimal_noise = self.optimal_theta[-1] self.optimal_theta = self.optimal_theta[:-1]
def _new_train(self): self._check_param() # Sampling points X and y X = self.training_points[None][0][0] y = self.training_points[None][0][1] # Compute PLS-coefficients (attr of self) and modified X and y (if GEKPLS is used) if self.name not in ["Kriging", "MGP"]: X, y = self._compute_pls(X.copy(), y.copy()) # Center and scale X and y ( self.X_norma, self.y_norma, self.X_offset, self.y_mean, self.X_scale, self.y_std, ) = standardization(X, y) if self.options["eval_noise"] and self.options["use_het_noise"]: # hetGP works with unique design variables ( self.X_norma, self.index_unique, # do we need to store it? self.nt_reps, # do we need to store it? ) = np.unique(self.X_norma, return_inverse=True, return_counts=True, axis=0) self.nt = self.X_norma.shape[0] # computing the mean of the output per unique design variable (see Binois et al., 2018) y_norma_unique = [] for i in range(self.nt): y_norma_unique.append( np.mean(self.y_norma[self.index_unique == i])) # pointwise sensible estimates of the noise variances (see Ankenman et al., 2010) self.noise = self.options["noise0"] * np.ones(self.nt) for i in range(self.nt): diff = self.y_norma[self.index_unique == i] - y_norma_unique[i] if np.sum(diff**2) != 0.0: self.noise[i] = np.std(diff, ddof=1)**2 self.noise = self.noise.tolist() / self.nt_reps self.y_norma = y_norma_unique # Calculate matrix of distances D between samples D, self.ij = cross_distances(self.X_norma) if np.min(np.sum(np.abs(D), axis=1)) == 0.0: print( "Warning: multiple x input features have the same value (at least same row twice)." ) #### # Regression matrix and parameters self.F = self._regression_types[self.options["poly"]](self.X_norma) n_samples_F = self.F.shape[0] if self.F.ndim > 1: p = self.F.shape[1] else: p = 1 self._check_F(n_samples_F, p) # Optimization ( self.optimal_rlf_value, self.optimal_par, self.optimal_theta, ) = self._optimize_hyperparam(D) if self.name in ["MGP"]: self._specific_train() else: if self.options["eval_noise"]: if not self.options["use_het_noise"]: self.noise = self.optimal_theta[self.D.shape[1]:] self.optimal_theta = self.optimal_theta[0:self.D.shape[1]]
def _new_train(self): """ Overrides KrgBased implementation Trains the Multi-Fidelity model """ xt = [] yt = [] i = 0 while self.training_points.get(i, None) is not None: xt.append(self.training_points[i][0][0]) yt.append(self.training_points[i][0][1]) i = i + 1 xt.append(self.training_points[None][0][0]) yt.append(self.training_points[None][0][1]) self._check_list_structure(xt, yt) self._check_param() X = self.X y = self.y _, _, self.X_mean, self.y_mean, self.X_std, self.y_std = standardization( np.concatenate(xt, axis=0), np.concatenate(yt, axis=0)) nlevel = self.nlvl n_samples = self.nt_all # initialize lists self.noise = nlevel * [0] self.D_all = nlevel * [0] self.F_all = nlevel * [0] self.p_all = nlevel * [0] self.q_all = nlevel * [0] self.optimal_rlf_value = nlevel * [0] self.optimal_par = nlevel * [{}] self.optimal_theta = nlevel * [0] self.X_norma_all = [(x - self.X_mean) / self.X_std for x in X] self.y_norma_all = [(f - self.y_mean) / self.y_std for f in y] for lvl in range(nlevel): self.X_norma = self.X_norma_all[lvl] self.y_norma = self.y_norma_all[lvl] # Calculate matrix of distances D between samples self.D_all[lvl] = cross_distances(self.X_norma) # Regression matrix and parameters self.F_all[lvl] = self._regression_types[self.options["poly"]]( self.X_norma) self.p_all[lvl] = self.F_all[lvl].shape[1] # Concatenate the autoregressive part for levels > 0 if lvl > 0: F_rho = self._regression_types[self.options["rho_regr"]]( self.X_norma) self.q_all[lvl] = F_rho.shape[1] self.F_all[lvl] = np.hstack(( F_rho * np.dot( self._predict_intermediate_values( self.X_norma, lvl, descale=False), np.ones((1, self.q_all[lvl])), ), self.F_all[lvl], )) else: self.q_all[lvl] = 0 n_samples_F_i = self.F_all[lvl].shape[0] if n_samples_F_i != n_samples[lvl]: raise Exception("Number of rows in F and X do not match. Most " "likely something is going wrong with the " "regression model.") if int(self.p_all[lvl] + self.q_all[lvl]) >= n_samples_F_i: raise Exception( ("Ordinary least squares problem is undetermined " "n_samples=%d must be greater than the regression" " model size p+q=%d.") % (n_samples[i], self.p_all[lvl] + self.q_all[lvl])) # Determine Gaussian Process model parameters self.F = self.F_all[lvl] D, self.ij = self.D_all[lvl] self._lvl = lvl self.nt = self.nt_all[lvl] self.q = self.q_all[lvl] self.p = self.p_all[lvl] ( self.optimal_rlf_value[lvl], self.optimal_par[lvl], self.optimal_theta[lvl], ) = self._optimize_hyperparam(D) if self.options["eval_noise"]: tmp_list = self.optimal_theta[lvl] self.optimal_theta[lvl] = tmp_list[:-1] self.noise[lvl] = tmp_list[-1] del self.y_norma, self.D if self.options["eval_noise"] and self.options["optim_var"]: for lvl in range(self.nlvl - 1): self.set_training_values(X[lvl], self._predict_intermediate_values( X[lvl], lvl + 1), name=lvl) self.set_training_values( X[-1], self._predict_intermediate_values(X[-1], self.nlvl)) self.options["eval_noise"] = False self._new_train()