def nmll_opt_gp(data, model_params, settings): train_x = torch.from_numpy(data['X']).float().to(settings.device) train_y = torch.from_numpy(data['f']).float().unsqueeze(-1).to( settings.device) test_x = torch.from_numpy(data['X_2']).float().to(settings.device) test_y = torch.from_numpy(data['f_2']).float().unsqueeze(-1).to( settings.device) sm_params, time_elapsed = nmll_opt(data, model_params, settings) if not model_params.is_no_mu: K11 = cal_kern_spec_mix_sep(train_x, train_x, sm_params.mu, sm_params.var, sm_params.weights) K12 = cal_kern_spec_mix_sep(train_x, test_x, sm_params.mu, sm_params.var, sm_params.weights) K22 = cal_kern_spec_mix_sep(test_x, test_x, sm_params.mu, sm_params.var, sm_params.weights) else: K11 = cal_kern_spec_mix_nomu_sep(train_x, train_x, sm_params.var, sm_params.weights) K12 = cal_kern_spec_mix_nomu_sep(train_x, test_x, sm_params.var, sm_params.weights) K22 = cal_kern_spec_mix_nomu_sep(test_x, test_x, sm_params.var, sm_params.weights) mu_test, var_test = GP_noise(train_y, K11, K12, K22, settings.epsilon, settings.device) mu_test = mu_test.detach().squeeze(-1).cpu().numpy() var_test = var_test.detach().squeeze(-1).cpu().numpy().diagonal() return mu_test, var_test, sm_params, time_elapsed
def predict(x_t,y_t,x_v,model_config_filename,use_gpu=False): model_conf = edict(yaml.load(open(model_config_filename, 'r'),Loader=yaml.FullLoader)) use_gpu = False device = torch.device('cuda' if use_gpu else 'cpu') pretrained_model_filename = model_conf.pretrained_model data_dim = x_t.shape[1] num_data = x_t.shape[0] # noise variance of GP epsilon = model_conf.epsilon data = {} data['X'] = x_t data['f'] = y_t data['X_2'] = x_v train_x = torch.from_numpy(data['X']).float().to(device) train_y = torch.from_numpy(data['f']).float().unsqueeze(-1).to(device) test_x = torch.from_numpy(data['X_2']).float().to(device) data['X_data'] =torch.from_numpy(data['X']).float().unsqueeze(0).to(device) # 1 X N X D data['F'] = torch.from_numpy(data['f']).float().unsqueeze(0).to(device) # 1 X N data['node_mask'] = torch.ones(num_data).unsqueeze(0).to(device) # 1 X N data['diagonal_mask'] = torch.zeros(num_data).unsqueeze(0).to(device) # 1 X N data['dim_mask'] = torch.ones(data_dim).unsqueeze(0).to(device) # 1 X D data['kernel_mask'] = torch.ones(num_data,num_data).unsqueeze(0).to(device) # 1 X N X N data['N'] = torch.ones(1).to(device) * num_data # 1 #create model and load pretrained model model = eval(model_conf.name)(model_conf) model_snapshot = torch.load(pretrained_model_filename, map_location=device) model.load_state_dict(model_snapshot["model"], strict=True) model.to(device) model.eval() with torch.no_grad(): if model_conf.name == 'GpSMDoubleAtt': mu, var, weights, nmll = model(data['X_data'],data['X_data'],data['F'],data['F'],data['node_mask'],data['dim_mask'],data['kernel_mask'],data['diagonal_mask'],data['N'], device = device) elif model_conf.name == 'GpSMDoubleAttNoMu': var, weights, nmll = model(data['X_data'],data['X_data'],data['F'],data['F'],data['node_mask'],data['dim_mask'],data['kernel_mask'],data['diagonal_mask'],data['N'], device = device) else: raise ValueError("No model of given name!") var = var.squeeze(0) weights = weights.squeeze(0) if model_conf.is_no_mu: K11 = cal_kern_spec_mix_nomu_sep(train_x, train_x, var, weights) K12 = cal_kern_spec_mix_nomu_sep(train_x, test_x, var, weights) K22 = cal_kern_spec_mix_nomu_sep(test_x, test_x, var, weights) else: mu = mu.squeeze(0) K11 = cal_kern_spec_mix_sep(train_x, train_x, mu, var, weights) K12 = cal_kern_spec_mix_sep(train_x, test_x, mu, var, weights) K22 = cal_kern_spec_mix_sep(test_x, test_x, mu, var, weights) nmll = -cal_marg_likelihood_single(K11, train_y, epsilon, device) mu_test, var_test = GP_noise(train_y, K11, K12, K22, epsilon, device) mu_test = mu_test.detach().squeeze(-1).cpu().numpy() var_test = var_test.detach().squeeze(-1).cpu().numpy().diagonal() return mu_test, var_test
def predict(self, X: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Predict mean and variance values for given points :param X: array of shape (n_points x n_inputs) of points to run prediction for :return: Tuple of mean and variance which are 2d arrays of shape (n_points x n_outputs) """ self.X_test_torch = torch.from_numpy(X).float().to(self.device) K11 = cal_kern_spec_mix_nomu_sep(self.X_train_torch, self.X_train_torch, self.sm_params.var, self.sm_params.weights) K12 = cal_kern_spec_mix_nomu_sep(self.X_train_torch, self.X_test_torch, self.sm_params.var, self.sm_params.weights) K22 = cal_kern_spec_mix_nomu_sep(self.X_test_torch, self.X_test_torch, self.sm_params.var, self.sm_params.weights) mu, var = GP_noise(self.Y_train_torch , K11, K12, K22, self.noise_level, self.device) mu = mu.cpu().numpy() var = np.diag(var.cpu().numpy())[:,None] return mu, var
def set_data(self, X: np.ndarray, Y: np.ndarray) -> None: """ Sets training data in model :param X: new points :param Y: function values at new points X """ self.X_train = X self.Y_train = Y self.X_train_torch = torch.from_numpy(X).float().to(self.device) self.Y_train_torch = torch.from_numpy(Y).float().to(self.device) K11 = cal_kern_spec_mix_nomu_sep(self.X_train_torch, self.X_train_torch, self.sm_params.var, self.sm_params.weights) self.kernel = K11.detach().cpu().numpy()
def graminv_residual(self) -> np.ndarray: """ The inverse Gram matrix multiplied with the mean-corrected data ..math:: (K_{XX} + \sigma^2 I)^{-1} (Y - m(X)) where the data is given by {X, Y} and m is the prior mean and sigma^2 the observation noise :return: the inverse Gram matrix multiplied with the mean-corrected data with shape: (number of datapoints, 1) """ K11 = cal_kern_spec_mix_nomu_sep(self.model.X_train_torch, self.model.X_train_torch, self.model.sm_params.var, self.model.sm_params.weights) G= (K11 + torch.eye(K11.shape[0])*self.model.noise_level).detach().cpu().numpy() result = np.linalg.solve(G, self.Y) return result
def solve_linear(self, z: np.ndarray) -> np.ndarray: """ Solve the linear system G(X, X)x=z for x. G(X, X) is the Gram matrix :math:`G(X, X) = K(X, X) + \sigma^2 I`, of shape (num_dat, num_dat) and z is a matrix of shape (num_dat, num_obs). :param z: a matrix of shape (num_dat, num_obs) :return: the solution to the linear system G(X, X)x = z, shape (num_dat, num_obs) """ K11 = cal_kern_spec_mix_nomu_sep(self.model.X_train_torch, self.model.X_train_torch, self.model.sm_params.var, self.model.sm_params.weights) G = (K11 + torch.eye(K11.shape[0])*self.model.noise_level).detach().cpu().numpy() result = np.linalg.solve(G, z) return result
def forward(self, X, y, epsilon, device): var = torch.clamp(self.var, min=-20.0, max=20.0) var = torch.exp(var) # var = torch.log(1+var) weights = torch.clamp(self.weights, min=-20.0, max=20.0) weights = self.softmax(weights) * 2 if self.is_no_mu: kern_sm = cal_kern_spec_mix_nomu_sep(X, X, var, weights) else: mu = torch.clamp(self.mu, min=-20.0, max=20.0) mu = torch.exp(mu) kern_sm = cal_kern_spec_mix_sep(X, X, mu, var, weights) nmll = -cal_marg_likelihood_single(kern_sm, y, epsilon, device) return nmll
def forward(self, X_data_tr, X_data_val, F_data_tr, F_data_val, node_mask_tr, dim_mask, kernel_mask_val, diagonal_mask_val, N_data_val, device=torch.device('cpu'), eval_mode = False, X_data_test = None, F_data_test = None, kernel_mask_test=None, diagonal_mask_test=None, N_data_test=None): """ X_data: B X N X D F_data: B X N initial node_mask: B X N X_input: (B X D) X N X 2 dim_mask: B X D node_mask: B X N """ #preprocess data to each dimension batch_size = X_data_tr.shape[0] max_dim = X_data_tr.shape[2] max_num_data = X_data_tr.shape[1] f_tr = F_data_tr.unsqueeze(-1) # B X N X 1 f_val = F_data_val.unsqueeze(-1) # B X N X 1 f_expand = f_tr.expand(-1,-1,max_dim) #B X N X D f_expand = f_expand * dim_mask.unsqueeze(-2) X_input = torch.cat((X_data_tr.unsqueeze(-1),f_expand.unsqueeze(-1)), -1) # B X N X D X 2 X_input = X_input.permute(0,2,1,3) X_input = X_input.reshape(-1,max_num_data,2) # (B X D) X N X 2 node_mask_tr = node_mask_tr.repeat(1,max_dim) # B X (D X N) node_mask_tr = node_mask_tr.reshape(-1,max_num_data) # (B X D) X N encoder_input = self.input_projection(X_input) # propagation for attention in self.self_attentions1: encoder_input, attns = attention(encoder_input,encoder_input,encoder_input,mask=node_mask_tr) N = torch.sum(node_mask_tr,dim=1) # N: (B x D) X 0 encoder_input = encoder_input * node_mask_tr.unsqueeze(-1) dim_encoder_input = torch.sum(encoder_input,1)/(N.unsqueeze(-1)) # (B X D) X hidden_dim dim_encoder_input = dim_encoder_input.reshape(batch_size,max_dim,dim_encoder_input.shape[-1]) # B X D X hidden_dim #feed encoder_input to next attention network for dimension for attention in self.self_attentions2: dim_encoder_input, attns = attention(dim_encoder_input,dim_encoder_input,dim_encoder_input,mask=dim_mask) dim_encoder_mask = dim_mask.unsqueeze(-1) # B X D X 1 dim_encoder_input = dim_encoder_input * dim_encoder_mask # B X D X hidden_dim state_var = dim_encoder_input.clone() state_weights = dim_encoder_input.clone() for tt in range(self.num_layer_var): state_var = F.relu(self.filter_var[tt](state_var)) state_var = F.dropout(state_var, self.dropout, training=self.training) for tt in range(self.num_layer_weights): state_weights = F.relu(self.filter_weights[tt](state_weights)) state_weights = F.dropout(state_weights, self.dropout, training=self.training) var = self.filter_var[-1](state_var) var = torch.clamp(var,min=-10,max=10) var = torch.exp(var) var = var * dim_mask.unsqueeze(-1) weights = self.filter_weights[-1](state_weights) weights = torch.clamp(weights,min=-10.0,max=10.0) weights = self.softmax(weights) var = var.permute(0,2,1) # B X M X D weights = weights.permute(0,2,1) # B X M X D K_val = cal_kern_spec_mix_nomu_sep(X_data_val, X_data_val, var, weights) nmll = -cal_marg_likelihood(K_val, f_val, self.epsilon, kernel_mask_val, diagonal_mask_val, N_data_val.float(), device) if eval_mode: f_test = F_data_test.unsqueeze(-1) # B X N X 1 K_test = cal_kern_spec_mix_nomu_sep(X_data_test, X_data_test, var, weights) nmll_test = -cal_marg_likelihood(K_test, f_test, self.epsilon, kernel_mask_test, diagonal_mask_test, N_data_test.float(), device) return var, weights, nmll, nmll_test return var, weights, nmll
def set_kernel(self): K11 = cal_kern_spec_mix_nomu_sep(self.X_train_torch, self.X_train_torch, self.sm_params.var, self.sm_params.weights) self.kernel = K11.detach().cpu().numpy()
def K(self, X: np.ndarray, x_2: np.ndarray) -> np.ndarray: X_torch = torch.from_numpy(X).float().to(self.gp_model.device) X_torch_2 = torch.from_numpy(x_2).float().to(self.gp_model.device) K = cal_kern_spec_mix_nomu_sep(X_torch, X_torch_2, self.gp_model.sm_params.var, self.gp_model.sm_params.weights) return K.detach().cpu().numpy()