示例#1
0
文件: alice_lds.py 项目: HIPS/pgmult
def compute_singular_vectors(model, words):
    # Compute the left and right singular vectors of the model's
    # dynamics matrix, A, then project these through C to get the
    # corresponding vector psi, which can be transformed into a
    # vector of word probabilities, pi, and sorted.
    A, C, mu = model.A, model.C, model.emission_distn.mu
    U,S,V = np.linalg.svd(A)

    def top_k(k, pi):
        # Get the top k words ranked by pi
        perm = np.argsort(pi)[::-1]
        return words[perm][:k]

    for d in range(min(5, A.shape[0])):
        ud = U[:,d]
        vd = V[d,:]

        psi_ud = C.dot(ud) + mu
        psi_vd = C.dot(vd)  + mu

        from pgmult.utils import psi_to_pi
        baseline = psi_to_pi(mu)
        pi_ud = psi_to_pi(psi_ud) - baseline
        pi_vd = psi_to_pi(psi_vd) - baseline

        print("")
        print("Singular vector ", d, " Singular value, ", S[d])
        print("Right: ")
        print(top_k(5, pi_vd))
        print("Left: ")
        print(top_k(5, pi_ud))
示例#2
0
 def log_likelihood(self):
     ll = 0
     for states in self.states_list:
         psi = states.stateseq.dot(self.C.T) + self.mu
         pi = psi_to_pi(psi)
         ll += np.sum(states.data * np.log(pi))
     return ll
示例#3
0
def initialize_test(N_max=10, true_model_class=MultinomialGP):
    D = 1           # Input dimensionality

    M_train = 100   # Number of observed training datapoints
    M_test = 20     # Number of observed test datapoints
    M = M_train + M_test
    l = 10.0        # Length scale of GP
    L = 120.0       # Length of observation sequence
    v = 1.0         # Variance of the GP

    # Initialize a grid of points at which to observe GP
    N = N_max * np.ones(M, dtype=np.int32)
    Z = np.linspace(0,L,M)[:,None]

    # Initialize the kernel
    kernel = RBF(1, lengthscale=l, variance=v)

    # Sample a GP
    true_model = true_model_class(K, kernel, D=D)
    X, psi = true_model.generate(Z=Z, N=N, full_output=True)
    pi = np.array([psi_to_pi(p) for p in psi])

    # Split the data into training and test
    Dataset = namedtuple("Dataset", ["K", "kernel", "Z", "X", "psi", "pi"])
    train = Dataset(K, kernel, Z[:M_train], X[:M_train], psi[:M_train], pi[:M_train])
    test = Dataset(K, kernel, Z[M_train:], X[M_train:], psi[M_train:], pi[M_train:])

    return train, test
示例#4
0
文件: gp.py 项目: HIPS/pgmult
    def generate(self, keep=True, Z=None, N=None, full_output=True):
        assert Z is not None and Z.ndim == 2 and Z.shape[1] == self.D
        M = Z.shape[0]

        assert N.ndim == 1 and N.shape[0] == M and np.all(N) >= 1
        assert N.dtype in (np.int32, np.int)
        N = N.astype(np.int32)

        # Compute the covariance of the Z's
        C = self.kernel.K(Z)

        # Sample from a zero mean GP, N(0, C) for each output, k
        psis = np.zeros((M, self.K-1))
        for k in range(self.K-1):
            # TODO: Reuse the Cholesky
            psis[:,k] = np.random.multivariate_normal(np.zeros(M), C)

        # Add the mean vector
        psis += self.mu[None,:]

        # Sample from the multinomial distribution
        pis = psi_to_pi(psis)
        X = np.array([np.random.multinomial(N[m], pis[m]) for m in range(M)])

        if keep:
            self.add_data(Z, X)

        if full_output:
            return X, psis
        else:
            return X
示例#5
0
文件: gp.py 项目: HIPS/pgmult
    def collapsed_predict(self, Z_new, full_output=True, full_cov=False):
        """
        Predict the multinomial probability vector at a grid of points, Z_new
        by first integrating out the value of psi at the data, Z_test, given
        omega and the kernel parameters.
        """
        assert len(self.data_list) == 1, "Must have one data list in order to predict."
        data = self.data_list[0]
        Z = data["Z"]

        assert Z_new is not None and Z_new.ndim == 2 and Z_new.shape[1] == self.D
        M_new = Z_new.shape[0]

        # Compute the kernel for Z_news
        C   = self.kernel.K(Z, Z)
        Cnn = self.kernel.K(Z_new, Z_new)
        Cnv = self.kernel.K(Z_new, Z)

        # Predict the psis
        mu_psis_new = np.zeros((self.K-1, M_new))
        Sig_psis_new = np.zeros((self.K-1, M_new, M_new))
        for k in range(self.K-1):
            sys.stdout.write(".")
            sys.stdout.flush()

            # Throw out inputs where N[:,k] == 0
            Omegak = data["omega"][:,k]
            kappak = data["kappa"][:,k]

            # Set the precision for invalid points to zero
            Omegak[Omegak == 0] = 1e-16

            # Account for the mean from the omega potentials
            y = kappak/Omegak - self.mu[k]

            # The y's are noisy observations at inputs Z
            # with diagonal covariace Omegak^{-1}
            Cvv_noisy = C + np.diag(1./Omegak)
            Lvv_noisy = np.linalg.cholesky(Cvv_noisy)

            # Compute the conditional mean given noisy observations
            psik_pred = Cnv.dot(dpotrs(Lvv_noisy, y, lower=True)[0])

            # Save these into the combined arrays
            mu_psis_new[k] = psik_pred + self.mu[k]

            if full_cov:
                Sig_psis_new[k] = Cnn - Cnv.dot(dpotrs(Lvv_noisy, Cnv.T, lower=True)[0])

        sys.stdout.write("\n")
        sys.stdout.flush()

        # Convert these to pis
        pis_new = psi_to_pi(mu_psis_new)

        if full_output:
            return pis_new, mu_psis_new, Sig_psis_new
        else:
            return pis_new
示例#6
0
def test_psi_pi_conversion():
    K = 10

    pi = np.ones(K) / float(K)
    psi = pi_to_psi(pi)
    pi2 = psi_to_pi(psi)

    print("pi:  ", pi)
    print("psi: ", psi)
    print("pi2: ", pi2)

    assert np.allclose(pi, pi2), "Mapping is not invertible."
示例#7
0
        def log_joint_C(C):
            ll = 0
            for states in self.states_list:
                z = states.stateseq
                psi = z.dot(C.T) + self.mu
                pi = psi_to_pi(psi)

                # TODO: Generalize for multinomial
                ll += np.nansum(states.data * np.log(pi))

            ll += (-0.5*C**2/self.sigma_C).sum()

            return ll
示例#8
0
def test_correlated_pgm_rvs(Sigma):
    K = Sigma.shape[0] + 1
    mu, _ = compute_uniform_mean_psi(K)
    print("mu:  ", mu)

    # Sample a bunch of pis and look at the marginals
    samples = 10000
    psis = np.random.multivariate_normal(mu, Sigma, size=samples)
    pis = []
    for smpl in xrange(samples):
        pis.append(psi_to_pi(psis[smpl]))
    pis = np.array(pis)

    print("E[pi]:   ", pis.mean(axis=0))
    print("var[pi]: ", pis.var(axis=0))

    plt.figure()
    plt.subplot(311)
    plt.boxplot(pis)
    plt.xlabel("k")
    plt.ylabel("$p(\pi_k)$")

    # Plot the covariance
    cov = np.cov(pis.T)
    plt.subplot(323)
    plt.imshow(cov[:-1,:-1], interpolation="None", cmap="cool")
    plt.colorbar()
    plt.title("Cov($\pi$)")

    plt.subplot(324)
    invcov = np.linalg.inv(cov[:-1,:-1] + np.diag(1e-6 * np.ones(K-1)))
    # good = np.delete(np.arange(K), np.arange(0,K,3))
    # invcov = np.linalg.inv(cov[np.ix_(good,good)])
    plt.imshow(invcov, interpolation="None", cmap="cool")
    plt.colorbar()
    plt.title("Cov$(\pi)^{-1}$")

    plt.subplot(325)
    plt.imshow(Sigma, interpolation="None", cmap="cool")
    plt.colorbar()
    plt.title("$\Sigma$")

    plt.subplot(326)
    plt.imshow(np.linalg.inv(Sigma), interpolation="None", cmap="cool")
    plt.colorbar()
    plt.title("$\Sigma^{-1}$")


    plt.savefig("correlated_psi_pi.png")
    plt.show()
示例#9
0
文件: gp.py 项目: HIPS/pgmult
    def predictive_log_likelihood(self, Z_pred, X_pred):
        """
        Predict the GP value at the inputs Z_pred and evaluate the likelihood of X_pred
        """
        _, mu_pred, Sig_pred = self.collapsed_predict(Z_pred, full_output=True)

        psis = np.array([np.random.multivariate_normal(mu, Sig) for mu,Sig in zip(mu_pred, Sig_pred)])
        pis = psi_to_pi(psis.T)

        pll = 0
        pll += gammaln(X_pred.sum(axis=1)+1).sum() - gammaln(X_pred+1).sum()
        pll += np.nansum(X_pred * np.log(pis))

        return pll, pis
示例#10
0
    def sample(self, z, x, i,n):
        """ Sample the next state given the previous time index

            :param z:       TxNxD buffer of particle states
            :param x:       NxD output buffer for observations
            :param i:       Time index to sample
            :param n:       Particle index to sample
        """
        psi = np.dot(self.C, z[i,n,:]) + self.mu
        pi = psi_to_pi(psi)


        from pybasicbayes.util.stats import sample_discrete
        s = sample_discrete(pi)
        x[i,:] = 0
        x[i,s] = 1
示例#11
0
    def logp(self, z, x, i, ll):
        """ Compute the log likelihood, log p(x|z), at time index i and put the
            output in the buffer ll.

            :param z:   TxNxD buffer of latent states
            :param x:   TxO buffer of observations
            :param i:   Time index at which to compute the log likelihood
            :param ll:  N buffer to populate with log likelihoods

            :return     Buffer ll should be populated with the log likelihood of
                        each particle.
        """
        # psi is N x O
        psi = np.dot(z[i], self.C.T) + self.mu
        pi = psi_to_pi(psi)

        llref = np.nansum(x[i] * np.log(pi), axis=1)
        np.copyto(np.asarray(ll), llref)
示例#12
0
文件: census_gp.py 项目: HIPS/pgmult
def compute_pred_likelihood(model, samples, test):
    Z_pred = get_inputs(test)

    preds = []
    for sample in samples:
        model.set_sample(sample)
        preds.append(model.predict(Z_pred, full_output=True)[1])

    psi_pred_mean = np.mean(preds, axis=0)

    if isinstance(model, pgmult.gp.MultinomialGP):
        pi_pred_mean = np.array([psi_to_pi(psi) for psi in psi_pred_mean])
    elif isinstance(model, pgmult.gp.LogisticNormalGP):
        from pgmult.internals.utils import ln_psi_to_pi
        pi_pred_mean = np.array([ln_psi_to_pi(psi) for psi in psi_pred_mean])
    else:
        raise NotImplementedError

    pll_gp = gammaln(test.data.sum(axis=1)+1).sum() - gammaln(test.data+1).sum()
    pll_gp += np.nansum(test.data * np.log(pi_pred_mean))
    return pll_gp
示例#13
0
    def predictive_log_likelihood(self, Xtest, data_index=0, Npred=100):
        """
        Hacky way of computing the predictive log likelihood
        :param X_pred:
        :param data_index:
        :param M:
        :return:
        """
        Tpred = Xtest.shape[0]

        # Sample particle trajectories
        preds = self.states_list[data_index].sample_predictions(Tpred, Npred)
        preds = np.transpose(preds, [2,0,1])
        assert preds.shape == (Npred, Tpred, self.n)

        psis = np.array([pred.dot(self.C.T) + self.mu for pred in preds])
        pis = np.array([psi_to_pi(psi) for psi in psis])

        # TODO: Generalize for multinomial
        lls = np.zeros(Npred)
        for m in xrange(Npred):
            # lls[m] = np.sum(
            #     [Multinomial(weights=pis[m,t,:], K=self.p).log_likelihood(Xtest[t][None,:])
            #      for t in xrange(Tpred)])
            lls[m] = np.nansum(Xtest * np.log(pis[m]))


        # Compute the average
        hll = logsumexp(lls) - np.log(Npred)

        # Use bootstrap to compute error bars
        samples = np.random.choice(lls, size=(100, Npred), replace=True)
        hll_samples = logsumexp(samples, axis=1) - np.log(Npred)
        std_hll = hll_samples.std()

        return hll, std_hll
示例#14
0
文件: lda.py 项目: fivejjs/pgmult
 def theta(self):
     return psi_to_pi(self.psi)
示例#15
0
文件: lda.py 项目: fivejjs/pgmult
 def beta(self):
     return psi_to_pi(self.psi, axis=1)
示例#16
0
def plot_spatial_distribution(train, samples, name="Ethan", year=2000):
    # Extract data from samples
    # Extract samp[les
    mus = np.array([s[0] for s in samples])
    psis = np.array([s[1][0][0] for s in samples])
    # omegas = np.array([s[1][0][1] for s in samples])

    # Adjust psis by the mean and compute the inferred pis
    psis += mus[0][None,None,:]
    pis = np.array([psi_to_pi(psi_sample) for psi_sample in psis])

    # Extract single name, year data
    data = pis[-1, train.years==year, train.names == name.lower()]
    lons = train.lon[train.years == year]
    lats = train.lat[train.years == year]

    fig = plt.figure(figsize=(3,3))
    ax = fig.add_subplot(111, aspect="equal")

    from mpl_toolkits.basemap import Basemap
    m = Basemap(width=6000000, height=3500000,
                resolution='l',projection='stere',
                lat_ts=50,lat_0=40,lon_0=-100.,
                ax=ax)
    land_color  = [.98, .98, .98]
    water_color = [.75, .75, .75]
    # water_color = [1., 1., 1.]
    m.fillcontinents(color=land_color, lake_color=water_color)
    m.drawcoastlines()
    m.drawstates()
    m.drawcountries()
    m.drawmapboundary(fill_color=water_color)

    # Convert data lons and data lats to map coordinates
    dx, dy = m(lons, lats)

    # Interpolate at a grid of points
    glons, glats = m.makegrid(100, 100)
    gx, gy = m(glons, glats)
    M = gx.size

    # Interpolate
    from scipy.interpolate import griddata
    gdata = griddata(np.hstack((dx[:,None], dy[:,None])),
                     data,
                     np.hstack((gx.reshape((M,1)), gy.reshape((M,1)))),
                     method="cubic")
    gdata = gdata.reshape(gx.shape)

    # Plot the contour
    cs = ax.contour(gx, gy, gdata, 15, cmap="Reds", linewidth=2)
    plt.title("%s (%d)" % (name, year))

    from mpl_toolkits.axes_grid1 import make_axes_locatable
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="3%", pad=0.05)
    cbar = plt.colorbar(cs, cax=cax)
    cbar.set_label("Probability", labelpad=10)

    plt.subplots_adjust(left=0.05, bottom=0.1, top=0.9, right=0.85)
    fig.savefig("%s_%d_geo.pdf" % (name.lower(), year))

    return fig, ax, m
示例#17
0
文件: gp.py 项目: HIPS/pgmult
    def collapsed_predict(self, Z_test):
        psi_pred, psi_pred_var =  self.model.predict(Z_test, full_cov=False)
        psi_pred += self.mu

        pi_pred = np.array([psi_to_pi(psi) for psi in psi_pred])
        return pi_pred, psi_pred, psi_pred_var
示例#18
0
文件: dna_lds.py 项目: yinsenm/pgmult
def plot_qualitative_results(X, key, psi_lds, z_lds):
    start = 50
    stop = 70

    # Get the corresponding protein labels
    import operator
    id_to_char = dict([(v, k) for k, v in list(key.items())])
    sorted_chars = [
        idc[1].upper()
        for idc in sorted(list(id_to_char.items()), key=operator.itemgetter(0))
    ]
    X_inds = np.where(X)[1]
    prot_str = [id_to_char[v].upper() for v in X_inds]

    from pgmult.utils import psi_to_pi
    pi_lds = psi_to_pi(psi_lds)

    # Plot the true and inferred states
    fig = create_figure(figsize=(3., 3.1))

    # Plot the string of protein labels
    # ax1 = create_axis_at_location(fig, 0.5, 2.5, 2.25, 0.25)
    # for n in xrange(start, stop):
    #     ax1.text(n, 0.5, prot_str[n].upper())
    # # ax1.get_xaxis().set_visible(False)
    # ax1.axis("off")
    # ax1.set_xlim([start-1,stop])
    # ax1.set_title("Protein Sequence")

    # ax2 = create_axis_at_location(fig, 0.5, 2.25, 2.25, 0.5)
    # ax2 = fig.add_subplot(311)
    # plt.imshow(X[start:stop,:].T, interpolation="none", vmin=0, vmax=1, cmap="Blues", aspect="auto")
    # ax2.set_title("One-hot Encoding")

    # ax3 = create_axis_at_location(fig, 0.5, 1.25, 2.25, 0.5)
    ax3 = fig.add_subplot(211)
    im3 = plt.imshow(np.kron(pi_lds[start:stop, :].T, np.ones((50, 50))),
                     interpolation="none",
                     vmin=0,
                     vmax=1,
                     cmap="Blues",
                     aspect="auto",
                     extent=(0, stop - start, K + 1, 1))
    # Circle true symbol
    from matplotlib.patches import Rectangle
    for n in range(start, stop):
        ax3.add_patch(
            Rectangle((n - start, X_inds[n] + 1),
                      1,
                      1,
                      facecolor="none",
                      edgecolor="k"))

    # Print protein labels on y axis
    # ax3.set_yticks(np.arange(K))
    # ax3.set_yticklabels(sorted_chars)

    # Print protein sequence as xticks
    ax3.set_xticks(0.5 + np.arange(0, stop - start))
    ax3.set_xticklabels(prot_str[start:stop])
    ax3.xaxis.tick_top()
    ax3.xaxis.set_tick_params(width=0)

    ax3.set_yticks(0.5 + np.arange(1, K + 1, 5))
    ax3.set_yticklabels(np.arange(1, K + 1, 5))
    ax3.set_ylabel("$k$")

    ax3.set_title("Inferred Protein Probability", y=1.25)

    # Add a colorbar
    from mpl_toolkits.axes_grid1 import make_axes_locatable
    divider = make_axes_locatable(ax3)
    cax = divider.append_axes("right", size="3%", pad=0.05)
    cbar = plt.colorbar(im3, cax=cax, ticks=[0, 0.25, 0.5, 0.75, 1])
    cbar.set_label("Probability", labelpad=10)

    # ax4 = create_axis_at_location(fig, 0.5, 0.5, 2.25, 0.55)
    lim = np.amax(abs(z_lds[start:stop]))
    ax4 = fig.add_subplot(212)
    im4 = plt.imshow(np.kron(z_lds[start:stop, :].T, np.ones((50, 50))),
                     interpolation="none",
                     vmin=-lim,
                     vmax=lim,
                     cmap="RdBu",
                     extent=(0, stop - start, D + 1, 1))
    ax4.set_xlabel("Position $t$")
    ax4.set_yticks(0.5 + np.arange(1, D + 1))
    ax4.set_yticklabels(np.arange(1, D + 1))
    ax4.set_ylabel("$d$")

    ax4.set_title("Latent state sequence")

    # Add a colorbar
    from mpl_toolkits.axes_grid1 import make_axes_locatable
    divider = make_axes_locatable(ax4)
    cax = divider.append_axes("right", size="3%", pad=0.05)
    # cbar_ticks = np.round(np.linspace(-lim, lim, 3))
    cbar_ticks = [-4, 0, 4]
    cbar = plt.colorbar(im4, cax=cax, ticks=cbar_ticks)
    # cbar.set_label("Probability", labelpad=10)

    # plt.subplots_adjust(top=0.9)
    # plt.tight_layout(pad=0.2)
    plt.savefig("dna_lds_1.png")
    plt.savefig("dna_lds_1.pdf")
    plt.show()
示例#19
0
    theta_mean = thetas.mean(0)
    theta_std  = thetas.std(0)

    betas = np.array(betas)
    beta_mean = betas.mean(0)
    beta_std  = betas.std(0)

    # Now sample from the prior for comparison
    print("Sampling from prior")
    from pybasicbayes.distributions import GaussianFixedMean
    from pgmult.utils import compute_uniform_mean_psi, psi_to_pi
    mu, sigma0 = compute_uniform_mean_psi(T)
    psis_prior = np.array(
        [GaussianFixedMean(mu=mu, lmbda_0=T * sigma0, nu_0=T).rvs(1)
         for _ in xrange(N_iter)])
    thetas_prior = psi_to_pi(psis_prior[:,0,:])
    betas_prior = np.random.dirichlet(alpha_beta*np.ones(V), size=(N_iter,))

    # print "Mean psi: ", psi_mean, " +- ", psi_std

    import pybasicbayes.util.general as general
    percentilecutoff = 5
    def plot_1d_scaled_quantiles(p1,p2,plot_midline=True):
        # scaled quantiles so that multiple calls line up
        p1.sort(), p2.sort() # NOTE: destructive! but that's cool
        xmin,xmax = general.scoreatpercentile(p1,percentilecutoff), \
                    general.scoreatpercentile(p1,100-percentilecutoff)
        ymin,ymax = general.scoreatpercentile(p2,percentilecutoff), \
                    general.scoreatpercentile(p2,100-percentilecutoff)
        plt.plot((p1-xmin)/(xmax-xmin),(p2-ymin)/(ymax-ymin))
示例#20
0
 def pi(self):
     psi = self.stateseq.dot(self.C.T) + self.mu
     return psi_to_pi(psi)
示例#21
0
文件: census_gp.py 项目: HIPS/pgmult
def plot_spatial_distribution(train, samples, name="Ethan", year=2000):
    # Extract data from samples
    # Extract samp[les
    mus = np.array([s[0] for s in samples])
    psis = np.array([s[1][0][0] for s in samples])
    # omegas = np.array([s[1][0][1] for s in samples])

    # Adjust psis by the mean and compute the inferred pis
    psis += mus[0][None,None,:]
    pis = np.array([psi_to_pi(psi_sample) for psi_sample in psis])

    # Extract single name, year data
    data = pis[-1, train.years==year, train.names == name.lower()]
    lons = train.lon[train.years == year]
    lats = train.lat[train.years == year]

    fig = plt.figure(figsize=(3,3))
    ax = fig.add_subplot(111, aspect="equal")

    from mpl_toolkits.basemap import Basemap
    m = Basemap(width=6000000, height=3500000,
                resolution='l',projection='stere',
                lat_ts=50,lat_0=40,lon_0=-100.,
                ax=ax)
    land_color  = [.98, .98, .98]
    water_color = [.75, .75, .75]
    # water_color = [1., 1., 1.]
    m.fillcontinents(color=land_color, lake_color=water_color)
    m.drawcoastlines()
    m.drawstates()
    m.drawcountries()
    m.drawmapboundary(fill_color=water_color)

    # Convert data lons and data lats to map coordinates
    dx, dy = m(lons, lats)

    # Interpolate at a grid of points
    glons, glats = m.makegrid(100, 100)
    gx, gy = m(glons, glats)
    M = gx.size

    # Interpolate
    from scipy.interpolate import griddata
    gdata = griddata(np.hstack((dx[:,None], dy[:,None])),
                     data,
                     np.hstack((gx.reshape((M,1)), gy.reshape((M,1)))),
                     method="cubic")
    gdata = gdata.reshape(gx.shape)

    # Plot the contour
    cs = ax.contour(gx, gy, gdata, 15, cmap="Reds", linewidth=2)
    plt.title("%s (%d)" % (name, year))

    from mpl_toolkits.axes_grid1 import make_axes_locatable
    divider = make_axes_locatable(ax)
    cax = divider.append_axes("right", size="3%", pad=0.05)
    cbar = plt.colorbar(cs, cax=cax)
    cbar.set_label("Probability", labelpad=10)

    plt.subplots_adjust(left=0.05, bottom=0.1, top=0.9, right=0.85)
    fig.savefig("%s_%d_geo.pdf" % (name.lower(), year))

    return fig, ax, m
示例#22
0
文件: census_gp.py 项目: HIPS/pgmult
def plot_census_results(train, samples, test, test_pis):
    # Extract samp[les
    train_mus = np.array([s[0] for s in samples])
    train_psis = np.array([s[1][0][0] for s in samples])
    # omegas = np.array([s[1][0][1] for s in samples])

    # Adjust psis by the mean and compute the inferred pis
    train_psis += train_mus[0][None,None,:]
    train_pis = np.array([psi_to_pi(psi_sample) for psi_sample in train_psis])
    train_pi_mean = np.mean(train_pis, axis=0)
    train_pi_std = np.std(train_pis, axis=0)

    # Compute test pi mean and std
    test_pi_mean = np.mean(test_pis, axis=0)
    test_pi_std = np.std(test_pis, axis=0)

    # Compute empirical probabilities
    train_pi_emp = train.data / train.data.sum(axis=1)[:,None]
    test_pi_emp = test.data / test.data.sum(axis=1)[:,None]


    # Plot the temporal trajectories for a few names
    names = ["Scott", "Matthew", "Ethan"]
    states = ["NY", "TX", "WA"]
    linestyles = ["-", "--", ":"]

    fig = create_figure(figsize=(3., 3))
    ax1 = create_axis_at_location(fig, 0.6, 0.5, 2.25, 1.75)
    for name, color in zip(names, colors):
        for state, linestyle in zip(states, linestyles):
            train_state_inds = (train.states == state)
            train_name_ind = np.array(train.names) == name.lower()
            train_years = train.years[train.states == state]
            train_mean_name = train_pi_mean[train_state_inds, train_name_ind]
            train_std_name = train_pi_std[train_state_inds, train_name_ind]

            test_state_inds = (test.states == state)
            test_name_ind = np.array(test.names) == name.lower()
            test_years = test.years[test.states == state]
            test_mean_name = test_pi_mean[test_state_inds, test_name_ind]
            test_std_name = test_pi_std[test_state_inds, test_name_ind]

            years = np.concatenate((train_years, test_years))
            mean_name = np.concatenate((train_mean_name, test_mean_name))
            std_name = np.concatenate((train_std_name, test_std_name))

            # Sausage plot
            sausage_plot(years, mean_name, std_name,
                         color=color, alpha=0.5)

            # Plot inferred mean
            plt.plot(years, mean_name,
                     color=color, label="%s, %s" % (name, state),
                     ls=linestyle, lw=2)

            # Plot empirical probabilities
            plt.plot(train.years[train_state_inds],
                     train_pi_emp[train_state_inds, train_name_ind],
                     color=color,
                     ls="", marker="x", markersize=4)

            plt.plot(test.years[test_state_inds],
                     test_pi_emp[test_state_inds, test_name_ind],
                     color=color,
                     ls="", marker="x", markersize=4)

    # Plot a vertical line to divide train and test
    ylim = plt.gca().get_ylim()
    plt.plot((test.years.min()-0.5) * np.ones(2), ylim, ':k', lw=0.5)
    plt.ylim(ylim)

    # plt.legend(loc="outside right")
    plt.legend(bbox_to_anchor=(0., 1.05, 1., .105), loc=3,
               ncol=len(names), mode="expand", borderaxespad=0.,
               fontsize="x-small")

    plt.xlabel("Year")
    plt.xlim(train.years.min(), test.years.max()+0.1)
    plt.ylabel("Probability")

    # plt.tight_layout()
    fig.savefig("census_gp_rates.pdf")

    plt.show()
    plt.pause(0.1)
示例#23
0
文件: gp.py 项目: HIPS/pgmult
    def pi(self, augmented_data):
        psi = self.psi(augmented_data)
        pi = psi_to_pi(psi)

        return pi
示例#24
0
 def pi(self):
     psi = self.stateseq.dot(self.C.T) + self.mu
     return psi_to_pi(psi)
示例#25
0
文件: dna_lds.py 项目: fivejjs/pgmult
def plot_qualitative_results(X, key, psi_lds, z_lds):
    start = 50
    stop = 70

    # Get the corresponding protein labels
    import operator
    id_to_char = dict([(v,k) for k,v in key.items()])
    sorted_chars = [idc[1].upper() for idc in sorted(id_to_char.items(), key=operator.itemgetter(0))]
    X_inds = np.where(X)[1]
    prot_str = [id_to_char[v].upper() for v in X_inds]


    from pgmult.utils import psi_to_pi
    pi_lds = psi_to_pi(psi_lds)

    # Plot the true and inferred states
    fig = create_figure(figsize=(3., 3.1))

    # Plot the string of protein labels
    # ax1 = create_axis_at_location(fig, 0.5, 2.5, 2.25, 0.25)
    # for n in xrange(start, stop):
    #     ax1.text(n, 0.5, prot_str[n].upper())
    # # ax1.get_xaxis().set_visible(False)
    # ax1.axis("off")
    # ax1.set_xlim([start-1,stop])
    # ax1.set_title("Protein Sequence")

    # ax2 = create_axis_at_location(fig, 0.5, 2.25, 2.25, 0.5)
    # ax2 = fig.add_subplot(311)
    # plt.imshow(X[start:stop,:].T, interpolation="none", vmin=0, vmax=1, cmap="Blues", aspect="auto")
    # ax2.set_title("One-hot Encoding")

    # ax3 = create_axis_at_location(fig, 0.5, 1.25, 2.25, 0.5)
    ax3 = fig.add_subplot(211)
    im3 = plt.imshow(np.kron(pi_lds[start:stop,:].T, np.ones((50,50))),
                             interpolation="none", vmin=0, vmax=1, cmap="Blues", aspect="auto",
               extent=(0,stop-start,K+1,1))
    # Circle true symbol
    from matplotlib.patches import Rectangle
    for n in xrange(start, stop):
        ax3.add_patch(Rectangle((n-start, X_inds[n]+1), 1, 1, facecolor="none", edgecolor="k"))

    # Print protein labels on y axis
    # ax3.set_yticks(np.arange(K))
    # ax3.set_yticklabels(sorted_chars)

    # Print protein sequence as xticks
    ax3.set_xticks(0.5+np.arange(0, stop-start))
    ax3.set_xticklabels(prot_str[start:stop])
    ax3.xaxis.tick_top()
    ax3.xaxis.set_tick_params(width=0)

    ax3.set_yticks(0.5+np.arange(1,K+1, 5))
    ax3.set_yticklabels(np.arange(1,K+1, 5))
    ax3.set_ylabel("$k$")

    ax3.set_title("Inferred Protein Probability", y=1.25)

    # Add a colorbar
    from mpl_toolkits.axes_grid1 import make_axes_locatable
    divider = make_axes_locatable(ax3)
    cax = divider.append_axes("right", size="3%", pad=0.05)
    cbar = plt.colorbar(im3, cax=cax, ticks=[0, 0.25, 0.5, 0.75, 1])
    cbar.set_label("Probability", labelpad=10)


    # ax4 = create_axis_at_location(fig, 0.5, 0.5, 2.25, 0.55)
    lim = np.amax(abs(z_lds[start:stop]))
    ax4 = fig.add_subplot(212)
    im4 = plt.imshow(np.kron(z_lds[start:stop, :].T, np.ones((50,50))),
                     interpolation="none", vmin=-lim, vmax=lim, cmap="RdBu",
                     extent=(0,stop-start, D+1,1))
    ax4.set_xlabel("Position $t$")
    ax4.set_yticks(0.5+np.arange(1,D+1))
    ax4.set_yticklabels(np.arange(1,D+1))
    ax4.set_ylabel("$d$")

    ax4.set_title("Latent state sequence")

    # Add a colorbar
    from mpl_toolkits.axes_grid1 import make_axes_locatable
    divider = make_axes_locatable(ax4)
    cax = divider.append_axes("right", size="3%", pad=0.05)
    # cbar_ticks = np.round(np.linspace(-lim, lim, 3))
    cbar_ticks = [-4, 0, 4]
    cbar = plt.colorbar(im4, cax=cax,  ticks=cbar_ticks)
    # cbar.set_label("Probability", labelpad=10)


    # plt.subplots_adjust(top=0.9)
    # plt.tight_layout(pad=0.2)
    plt.savefig("dna_lds_1.png")
    plt.savefig("dna_lds_1.pdf")
    plt.show()
示例#26
0
def plot_census_results(train, samples, test, test_pis):
    # Extract samp[les
    train_mus = np.array([s[0] for s in samples])
    train_psis = np.array([s[1][0][0] for s in samples])
    # omegas = np.array([s[1][0][1] for s in samples])

    # Adjust psis by the mean and compute the inferred pis
    train_psis += train_mus[0][None,None,:]
    train_pis = np.array([psi_to_pi(psi_sample) for psi_sample in train_psis])
    train_pi_mean = np.mean(train_pis, axis=0)
    train_pi_std = np.std(train_pis, axis=0)

    # Compute test pi mean and std
    test_pi_mean = np.mean(test_pis, axis=0)
    test_pi_std = np.std(test_pis, axis=0)

    # Compute empirical probabilities
    train_pi_emp = train.data / train.data.sum(axis=1)[:,None]
    test_pi_emp = test.data / test.data.sum(axis=1)[:,None]


    # Plot the temporal trajectories for a few names
    names = ["Scott", "Matthew", "Ethan"]
    states = ["NY", "TX", "WA"]
    linestyles = ["-", "--", ":"]

    fig = create_figure(figsize=(3., 3))
    ax1 = create_axis_at_location(fig, 0.6, 0.5, 2.25, 1.75)
    for name, color in zip(names, colors):
        for state, linestyle in zip(states, linestyles):
            train_state_inds = (train.states == state)
            train_name_ind = np.array(train.names) == name.lower()
            train_years = train.years[train.states == state]
            train_mean_name = train_pi_mean[train_state_inds, train_name_ind]
            train_std_name = train_pi_std[train_state_inds, train_name_ind]

            test_state_inds = (test.states == state)
            test_name_ind = np.array(test.names) == name.lower()
            test_years = test.years[test.states == state]
            test_mean_name = test_pi_mean[test_state_inds, test_name_ind]
            test_std_name = test_pi_std[test_state_inds, test_name_ind]

            years = np.concatenate((train_years, test_years))
            mean_name = np.concatenate((train_mean_name, test_mean_name))
            std_name = np.concatenate((train_std_name, test_std_name))

            # Sausage plot
            sausage_plot(years, mean_name, std_name,
                         color=color, alpha=0.5)

            # Plot inferred mean
            plt.plot(years, mean_name,
                     color=color, label="%s, %s" % (name, state),
                     ls=linestyle, lw=2)

            # Plot empirical probabilities
            plt.plot(train.years[train_state_inds],
                     train_pi_emp[train_state_inds, train_name_ind],
                     color=color,
                     ls="", marker="x", markersize=4)

            plt.plot(test.years[test_state_inds],
                     test_pi_emp[test_state_inds, test_name_ind],
                     color=color,
                     ls="", marker="x", markersize=4)

    # Plot a vertical line to divide train and test
    ylim = plt.gca().get_ylim()
    plt.plot((test.years.min()-0.5) * np.ones(2), ylim, ':k', lw=0.5)
    plt.ylim(ylim)

    # plt.legend(loc="outside right")
    plt.legend(bbox_to_anchor=(0., 1.05, 1., .105), loc=3,
               ncol=len(names), mode="expand", borderaxespad=0.,
               fontsize="x-small")

    plt.xlabel("Year")
    plt.xlim(train.years.min(), test.years.max()+0.1)
    plt.ylabel("Probability")

    # plt.tight_layout()
    fig.savefig("census_gp_rates.pdf")

    plt.show()
    plt.pause(0.1)