示例#1
0
文件: gp.py 项目: yinsenm/pgmult
    def initialize_from_data(self, pi_lim=None, initialize_to_mle=True):
        """
        Initialize the psi's to the empirical mean of the data
        :return:
        """
        for data in self.data_list:
            # Compute the empirical probability
            X = data["X"]
            M = data["M"]
            assert X.ndim == 2 and X.shape[1] == self.K

            # Get the empirical probabilities (offset by 1 to ensure nonzero)
            alpha = 1.0
            pi_emp = (alpha+X).astype(np.float) / \
                           (alpha + X).sum(axis=1)[:,None]
            pi_emp_mean = pi_emp.mean(axis=0)

            # Set mu equal to the empirical mean value of pi
            psi_emp_mean = ln_pi_to_psi(pi_emp_mean)
            # self.mu = psi_emp_mean
            self.mu = np.zeros(self.K)

            if initialize_to_mle:
                # Convert empirical values to psi
                psi_emp = np.array([ln_pi_to_psi(p) for p in pi_emp])
                psi_emp -= self.mu
                assert psi_emp.shape == (M, self.K)

                # Set the deviations from the mean to zero
                data["psi"] = psi_emp
            else:
                data["psi"] = np.zeros((M, self.K))
示例#2
0
文件: census_gp.py 项目: HIPS/pgmult
def fit_gp_multinomial_model(model, test, pi_train=None, N_samples=100, run=1):
    if pi_train is not None:
        if isinstance(model, pgmult.gp.LogisticNormalGP):
            model.data_list[0]["psi"] = ln_pi_to_psi(pi_train) - model.mu
        elif isinstance(model, pgmult.gp.MultinomialGP):
            model.data_list[0]["psi"] = pi_to_psi(pi_train) - model.mu
            model.resample_omega()
    else:
        model.initialize_from_data()

    ### Inference
    results_base = os.path.join("results", "names", "run%03d" % run, "results")
    results_file = results_base + ".pkl.gz"
    if os.path.exists(results_file):
        with gzip.open(results_file, "r") as f:
            samples, lls, pred_lls, timestamps = pickle.load(f)

    else:
        Z_test = get_inputs(test)
        lls = [model.log_likelihood()]
        samples = [model.copy_sample()]
        pred_ll, pred_pi = model.predictive_log_likelihood(Z_test, test.data)
        pred_lls = [pred_ll]
        pred_pis = [pred_pi]
        times = [0]

        # Print initial values
        print("Initial LL: ", lls[0])
        print("Initial Pred LL: ", pred_lls[0])


        for itr in range(N_samples):
            print("Iteration ", itr)
            tic = time.time()
            model.resample_model(verbose=True)
            times.append(time.time()-tic)

            samples.append(model.copy_sample())
            lls.append(model.log_likelihood())
            pred_ll, pred_pi = model.predictive_log_likelihood(get_inputs(test), test.data)
            pred_lls.append(pred_ll)
            pred_pis.append(pred_pi)

            print("Log likelihood: ", lls[-1])
            print("Pred Log likelihood: ", pred_ll)

            # Save this sample
            # with gzip.open(results_file + ".itr%03d.pkl.gz" % itr, "w") as f:
            #     pickle.dump(model, f, protocol=-1)

        lls = np.array(lls)
        pred_lls = np.array(pred_lls)
        timestamps = np.cumsum(times)

    return samples, lls, pred_lls, pred_pis, timestamps
示例#3
0
def fit_gp_multinomial_model(model, test, pi_train=None, N_samples=100, run=1):
    if pi_train is not None:
        if isinstance(model, pgmult.gp.LogisticNormalGP):
            model.data_list[0]["psi"] = ln_pi_to_psi(pi_train) - model.mu
        elif isinstance(model, pgmult.gp.MultinomialGP):
            model.data_list[0]["psi"] = pi_to_psi(pi_train) - model.mu
            model.resample_omega()
    else:
        model.initialize_from_data()

    ### Inference
    results_base = os.path.join("results", "names", "run%03d" % run, "results")
    results_file = results_base + ".pkl.gz"
    if os.path.exists(results_file):
        with gzip.open(results_file, "r") as f:
            samples, lls, pred_lls, timestamps = pickle.load(f)

    else:
        Z_test = get_inputs(test)
        lls = [model.log_likelihood()]
        samples = [model.copy_sample()]
        pred_ll, pred_pi = model.predictive_log_likelihood(Z_test, test.data)
        pred_lls = [pred_ll]
        pred_pis = [pred_pi]
        times = [0]

        # Print initial values
        print("Initial LL: ", lls[0])
        print("Initial Pred LL: ", pred_lls[0])


        for itr in xrange(N_samples):
            print("Iteration ", itr)
            tic = time.time()
            model.resample_model(verbose=True)
            times.append(time.time()-tic)

            samples.append(model.copy_sample())
            lls.append(model.log_likelihood())
            pred_ll, pred_pi = model.predictive_log_likelihood(get_inputs(test), test.data)
            pred_lls.append(pred_ll)
            pred_pis.append(pred_pi)

            print("Log likelihood: ", lls[-1])
            print("Pred Log likelihood: ", pred_ll)

            # Save this sample
            # with gzip.open(results_file + ".itr%03d.pkl.gz" % itr, "w") as f:
            #     pickle.dump(model, f, protocol=-1)

        lls = np.array(lls)
        pred_lls = np.array(pred_lls)
        timestamps = np.cumsum(times)

    return samples, lls, pred_lls, pred_pis, timestamps
示例#4
0
 def pi(self, value):
     self.psi = ln_pi_to_psi(value)
示例#5
0
 def theta(self, theta):
     self.psi = ln_pi_to_psi(theta)
示例#6
0
文件: lda.py 项目: fivejjs/pgmult
 def theta(self, theta):
     self.psi = ln_pi_to_psi(theta)