def initialize_from_data(self, pi_lim=None, initialize_to_mle=True): """ Initialize the psi's to the empirical mean of the data :return: """ for data in self.data_list: # Compute the empirical probability X = data["X"] M = data["M"] assert X.ndim == 2 and X.shape[1] == self.K # Get the empirical probabilities (offset by 1 to ensure nonzero) alpha = 1.0 pi_emp = (alpha+X).astype(np.float) / \ (alpha + X).sum(axis=1)[:,None] pi_emp_mean = pi_emp.mean(axis=0) # Set mu equal to the empirical mean value of pi psi_emp_mean = ln_pi_to_psi(pi_emp_mean) # self.mu = psi_emp_mean self.mu = np.zeros(self.K) if initialize_to_mle: # Convert empirical values to psi psi_emp = np.array([ln_pi_to_psi(p) for p in pi_emp]) psi_emp -= self.mu assert psi_emp.shape == (M, self.K) # Set the deviations from the mean to zero data["psi"] = psi_emp else: data["psi"] = np.zeros((M, self.K))
def fit_gp_multinomial_model(model, test, pi_train=None, N_samples=100, run=1): if pi_train is not None: if isinstance(model, pgmult.gp.LogisticNormalGP): model.data_list[0]["psi"] = ln_pi_to_psi(pi_train) - model.mu elif isinstance(model, pgmult.gp.MultinomialGP): model.data_list[0]["psi"] = pi_to_psi(pi_train) - model.mu model.resample_omega() else: model.initialize_from_data() ### Inference results_base = os.path.join("results", "names", "run%03d" % run, "results") results_file = results_base + ".pkl.gz" if os.path.exists(results_file): with gzip.open(results_file, "r") as f: samples, lls, pred_lls, timestamps = pickle.load(f) else: Z_test = get_inputs(test) lls = [model.log_likelihood()] samples = [model.copy_sample()] pred_ll, pred_pi = model.predictive_log_likelihood(Z_test, test.data) pred_lls = [pred_ll] pred_pis = [pred_pi] times = [0] # Print initial values print("Initial LL: ", lls[0]) print("Initial Pred LL: ", pred_lls[0]) for itr in range(N_samples): print("Iteration ", itr) tic = time.time() model.resample_model(verbose=True) times.append(time.time()-tic) samples.append(model.copy_sample()) lls.append(model.log_likelihood()) pred_ll, pred_pi = model.predictive_log_likelihood(get_inputs(test), test.data) pred_lls.append(pred_ll) pred_pis.append(pred_pi) print("Log likelihood: ", lls[-1]) print("Pred Log likelihood: ", pred_ll) # Save this sample # with gzip.open(results_file + ".itr%03d.pkl.gz" % itr, "w") as f: # pickle.dump(model, f, protocol=-1) lls = np.array(lls) pred_lls = np.array(pred_lls) timestamps = np.cumsum(times) return samples, lls, pred_lls, pred_pis, timestamps
def fit_gp_multinomial_model(model, test, pi_train=None, N_samples=100, run=1): if pi_train is not None: if isinstance(model, pgmult.gp.LogisticNormalGP): model.data_list[0]["psi"] = ln_pi_to_psi(pi_train) - model.mu elif isinstance(model, pgmult.gp.MultinomialGP): model.data_list[0]["psi"] = pi_to_psi(pi_train) - model.mu model.resample_omega() else: model.initialize_from_data() ### Inference results_base = os.path.join("results", "names", "run%03d" % run, "results") results_file = results_base + ".pkl.gz" if os.path.exists(results_file): with gzip.open(results_file, "r") as f: samples, lls, pred_lls, timestamps = pickle.load(f) else: Z_test = get_inputs(test) lls = [model.log_likelihood()] samples = [model.copy_sample()] pred_ll, pred_pi = model.predictive_log_likelihood(Z_test, test.data) pred_lls = [pred_ll] pred_pis = [pred_pi] times = [0] # Print initial values print("Initial LL: ", lls[0]) print("Initial Pred LL: ", pred_lls[0]) for itr in xrange(N_samples): print("Iteration ", itr) tic = time.time() model.resample_model(verbose=True) times.append(time.time()-tic) samples.append(model.copy_sample()) lls.append(model.log_likelihood()) pred_ll, pred_pi = model.predictive_log_likelihood(get_inputs(test), test.data) pred_lls.append(pred_ll) pred_pis.append(pred_pi) print("Log likelihood: ", lls[-1]) print("Pred Log likelihood: ", pred_ll) # Save this sample # with gzip.open(results_file + ".itr%03d.pkl.gz" % itr, "w") as f: # pickle.dump(model, f, protocol=-1) lls = np.array(lls) pred_lls = np.array(pred_lls) timestamps = np.cumsum(times) return samples, lls, pred_lls, pred_pis, timestamps
def pi(self, value): self.psi = ln_pi_to_psi(value)
def theta(self, theta): self.psi = ln_pi_to_psi(theta)