示例#1
0
def svi_example(true_model, true_data):
    X, mask = true_data.X, true_data.mask

    # Fit a test model
    model = FactorAnalysis(
        D_obs, D_latent,
        # W=true_model.W, sigmasq=true_model.sigmasq
        )

    # Add the data in minibatches
    minibatchsize = 250
    for start in range(0, N, minibatchsize):
        end = min(start + minibatchsize, N)
        model.add_data(X[start:end], mask=mask[start:end])

    lps = []
    angles = []
    N_iters = 100
    delay = 10.0
    forgetting_rate = 0.75
    stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate)
    for itr in progprint_xrange(N_iters):
        lps.append(model.meanfield_sgdstep(stepsize[itr]))
        E_W, _, _, _ = model.regression.mf_expectations
        angles.append(principal_angle(true_model.W, E_W))

    Z_inf = model.data_list[0].E_Z
    Z_true = true_data.Z[:Z_inf.shape[0]]
    plot_results(lps, angles, Z_true, Z_inf)
示例#2
0
文件: ap_lds.py 项目: fivejjs/pgmult
def fit_hmm(Xs, Xtest, D_hmm, N_samples=100):
    print("Fitting HMM with %d states" % D_hmm)
    model = MultinomialHMM(K, D_hmm, alpha_0=10.0)

    for X in Xs:
        model.add_data(X)

    compute_pred_ll = lambda: sum([model.log_likelihood(np.vstack((Xs[i], Xtest[i])))
                                   - model.log_likelihood(Xs[i])
                                   for i,Xt in enumerate(Xtest)])

    init_results = (0, None, model.log_likelihood(),
                    model.log_likelihood(Xtest),
                    compute_pred_ll())

    def resample():
        tic = time.time()
        model.resample_model()
        toc = time.time() - tic

        return toc, None, model.log_likelihood(), \
            np.nan, \
            compute_pred_ll()

    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] +
            [resample() for _ in progprint_xrange(N_samples, perline=5)])))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#3
0
def svi_example(true_model, X, Z_true, mask):
    # Fit a test model
    model = FactorAnalysis(
        D_obs, D_latent,
        # W=true_model.W, sigmasq=true_model.sigmasq
        )

    # Add the data in minibatches
    N = X.shape[0]
    minibatchsize = 200
    prob = minibatchsize / float(N)

    lps = []
    angles = []
    N_iters = 100
    delay = 10.0
    forgetting_rate = 0.75
    stepsize = (np.arange(N_iters) + delay)**(-forgetting_rate)
    for itr in progprint_xrange(N_iters):
        minibatch = np.random.permutation(N)[:minibatchsize]
        X_mb, mask_mb = X[minibatch], mask[minibatch]
        lps.append(model.meanfield_sgdstep(X_mb, prob, stepsize[itr], masks=mask_mb))
        E_W, _, _, _ = model.regression.mf_expectations
        angles.append(principal_angle(true_model.W, E_W))

    # Compute the expected states for the first minibatch of data
    model.add_data(X, mask)
    statesobj = model.data_list.pop()
    statesobj.meanfieldupdate()
    Z_inf = statesobj.E_Z
    plot_results(lps, angles, Z_true, Z_inf)
def fit(name, model, test_data, N_iter=1000, init_state_seq=None):
    def evaluate(model):
        ll = model.log_likelihood()
        pll = model.log_likelihood(test_data)
        N_used = len(model.used_states)
        trans = model.trans_distn
        alpha = trans.alpha
        gamma = trans.gamma if hasattr(trans, "gamma") else None
        rates = model.rates.copy()
        obs_hypers = model.obs_hypers
        # print 'N_states: {}, \tPLL:{}\n'.format(len(model.used_states), pll),
        return ll, pll, N_used, alpha, gamma, rates, obs_hypers

    def sample(model):
        tic = time.time()
        model.resample_model()
        timestep = time.time() - tic
        return evaluate(model), timestep

    # Initialize with given state seq
    if init_state_seq is not None:
        model.states_list[0].stateseq = init_state_seq
        for _ in xrange(100):
            model.resample_obs_distns()

    init_val = evaluate(model)
    vals, timesteps = zip(*[sample(model) for _ in progprint_xrange(N_iter)])

    lls, plls, N_used, alphas, gammas, rates, obs_hypers = \
        zip(*((init_val,) + vals))
    timestamps = np.cumsum((0.,) + timesteps)

    return Results(name, lls, plls, N_used, alphas, gammas,
                   rates, obs_hypers,
                   model.copy_sample(), timestamps)
示例#5
0
def fit_hmm(Xs, Xtest, N_samples=100):
    model = MultinomialHMM(K, D)

    for X in Xs:
        model.add_data(X)

    samples = []
    lls = []
    test_lls = []
    pis = []
    zs = []
    timestamps = [time.time()]
    for smpl in progprint_xrange(N_samples):
        model.resample_model()
        timestamps.append(time.time())

        samples.append(model.copy_sample())
        # TODO: Use log_likelihood() to marginalize over z
        lls.append(model.log_likelihood())
        # lls.append(model.log_likelihood_fixed_z())
        test_lls.append(model.log_likelihood(Xtest))
        # pis.append(testmodel.pis()[0])
        zs.append(model.stateseqs[0])

    lls = np.array(lls)
    test_lls = np.array(test_lls)
    pis = np.array(pis)
    zs = np.array(zs)
    timestamps = np.array(timestamps)
    timestamps -= timestamps[0]

    return model, lls, test_lls, pis, zs, timestamps
示例#6
0
def fit_hmm(Xs, Xtest, D_hmm, N_samples=100):
    Nx = len(Xs)
    assert len(Xtest) == Nx

    print("Fitting HMM with %d states" % D_hmm)
    models = [MultinomialHMM(K, D_hmm, alpha_0=10.0) for _ in xrange(Nx)]

    for X, model in zip(Xs, models):
        model.add_data(X)

    def compute_pred_ll():
        pred_ll = 0
        for Xtr, Xte, model in zip(Xs, Xtest, models):
            pred_ll += model.log_likelihood(np.vstack((Xtr, Xte))) - model.log_likelihood(Xtr)
        return pred_ll

    init_results = (0, None, np.nan,  np.nan,  compute_pred_ll())

    def resample():
        tic = time.time()
        [model.resample_model() for model in models]
        toc = time.time() - tic

        return toc, None, np.nan, np.nan, compute_pred_ll()

    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] +
            [resample() for _ in progprint_xrange(N_samples, perline=5)])))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#7
0
文件: ctm.py 项目: ariddell/pgmult
def train_model(model, train_data, test_data, N_samples=300, method='resample_model', thetas=None):
    print('Training %s with %s' % (model.__class__.__name__, method))
    model.add_data(train_data)

    # Initialize to a given set of thetas
    if thetas is not None:
        model.thetas = thetas
        for d in model.documents:
            d.resample_z()

    init_like, init_perp, init_sample, init_time = \
        model.log_likelihood(), model.perplexity(test_data), \
        model.copy_sample(), time.time()

    def update(i):
        operator.methodcaller(method)(model)
        # print "ll: ", model.log_likelihood()
        return model.log_likelihood(), \
               model.perplexity(test_data), \
               model.copy_sample(), \
               time.time()

    likes, perps, samples, timestamps = zip(*[update(i) for i in progprint_xrange(N_samples,perline=5)])

    # Get relative timestamps
    timestamps = np.array((init_time,) + timestamps)
    timestamps -= timestamps[0]

    return Results((init_like,) + likes,
                   (init_perp,) + perps,
                   (init_sample,) + samples,
                   timestamps)
示例#8
0
文件: dna_lds.py 项目: fivejjs/pgmult
def fit_hmm(Xs, Xtest, D_hmm, N_samples=100):
    print("Fitting HMM with %d states" % D_hmm)
    model = MultinomialHMM(K, D_hmm)

    for X in Xs:
        model.add_data(X)

    init_results = (0, None, model.log_likelihood(),
                    model.log_likelihood(Xtest),
                    (model.log_likelihood(np.vstack((Xs[0], Xtest))) - model.log_likelihood(Xs[0])))

    def resample():
        tic = time.time()
        model.resample_model()
        toc = time.time() - tic

        return toc, None, model.log_likelihood(), \
            model.log_likelihood(Xtest), \
            (model.log_likelihood(np.vstack((Xs[0], Xtest))) - model.log_likelihood(Xs[0]))

    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] + [resample() for _ in progprint_xrange(N_samples)])))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#9
0
文件: ctm.py 项目: fivejjs/pgmult
    def fit(train_data, test_data, T, Niter, init_at_em, *args):
        resample = operator.methodcaller(method)

        def evaluate(model):
            ll, pll, perp = \
                model.log_likelihood(), model.log_likelihood(test_data), \
                model.perplexity(test_data)
            return ll, pll, perp

        def sample(model):
            tic = time.time()
            resample(model)
            timestep = time.time() - tic
            return evaluate(model), timestep

        print('Running %s...' % name)
        model = cls(train_data, T, *args)
        model = initializer(model) if init_at_em and initializer else model
        init_val = evaluate(model)
        vals, timesteps = zip(*[sample(model) for _ in progprint_xrange(Niter)])

        lls, plls, perps = zip(*((init_val,) + vals))
        timestamps = np.cumsum((0.,) + timesteps)

        return Results(lls, plls, perps, model.copy_sample(), timestamps)
示例#10
0
文件: dna_lds.py 项目: fivejjs/pgmult
def fit_lds_model(Xs, Xtest, D, N_samples=100):
    model = MultinomialLDS(K, D,
        init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1*np.eye(D)),
        dynamics_distn=AutoRegression(nu_0=D+1,S_0=1*np.eye(D),M_0=np.zeros((D,D)),K_0=1*np.eye(D)),
        sigma_C=0.01
        )

    for X in Xs:
        model.add_data(X)

    model.resample_parameters()

    init_results = (0, model, model.log_likelihood(),
                    model.heldout_log_likelihood(Xtest, M=1),
                    model.predictive_log_likelihood(Xtest, M=1000))

    def resample():
        tic = time.time()
        model.resample_model()
        toc = time.time() - tic

        return toc, None, model.log_likelihood(), \
            model.heldout_log_likelihood(Xtest, M=1), \
            model.predictive_log_likelihood(Xtest, M=1000)

    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] + [resample() for _ in progprint_xrange(N_samples)])))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#11
0
文件: models.py 项目: sheqi/pyglm
    def ais(self, N_samples=100, B=1000, steps_per_B=1,
            verbose=True, full_output=False, callback=None):
        """
        Since Gibbs sampling as a function of temperature is implemented,
        we can use AIS to approximate the marginal likelihood of the model.
        """
        # We use a linear schedule by default
        betas = np.linspace(0, 1, B)

        print "Estimating marginal likelihood with AIS"
        lw = np.zeros(N_samples)
        for m in progprint_xrange(N_samples):
            # Initialize the model with a draw from the prior
            self.initialize_from_prior()

            # Keep track of the log of the m-th weight
            # It starts at zero because the prior is assumed to be normalized
            lw[m] = 0.0

            # Sample the intermediate distributions
            for b in xrange(1,B):
                if verbose:
                    sys.stdout.write("M: %d\tBeta: %.3f \r" % (m,betas[b]))
                    sys.stdout.flush()

                # Compute the ratio of this sample under this distribution
                # and the previous distribution. The difference is added
                # to the log weight
                curr_lp = self.log_probability(temperature=betas[b])
                prev_lp = self.log_probability(temperature=betas[b-1])
                lw[m] += curr_lp - prev_lp

                # Sample the model at temperature betas[b]
                # Take some number of steps per beta in hopes that
                # the Markov chain will reach equilibrium.
                for s in range(steps_per_B):
                    self.collapsed_resample_model(temperature=betas[b])

                # Call the given callback
                if callback:
                    callback(self, m, b)

            if verbose:
                print ""
                print "W: %f" % lw[m]


        # Compute the mean of the weights to get an estimate of the normalization constant
        log_Z = -np.log(N_samples) + logsumexp(lw)

        # Use bootstrap to compute standard error
        subsamples = np.random.choice(lw, size=(100, N_samples), replace=True)
        log_Z_subsamples = logsumexp(subsamples, axis=1) - np.log(N_samples)
        std_log_Z = log_Z_subsamples.std()

        if full_output:
            return log_Z, std_log_Z, lw
        else:
            return log_Z, std_log_Z
示例#12
0
def fit_gaussian_lds_model(Xs, Xtest, D_gauss_lds, N_samples=100):
    Nx = len(Xs)
    assert len(Xtest) == Nx

    print("Fitting Gaussian (Raw) LDS with %d states" % D_gauss_lds)
    from pylds.models import NonstationaryLDS
    models = [NonstationaryLDS(
                init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1*np.eye(D)),
                dynamics_distn=AutoRegression(nu_0=D+1,S_0=1*np.eye(D),M_0=np.zeros((D,D)),K_0=1*np.eye(D)),
                emission_distn=Regression(nu_0=K+1,S_0=K*np.eye(K),M_0=np.zeros((K,D)),K_0=K*np.eye(D)))
              for _ in xrange(Nx)]

    Xs_centered = [X - np.mean(X, axis=0)[None,:] + 1e-3*np.random.randn(*X.shape) for X in Xs]
    for X, model in zip(Xs_centered, models):
        model.add_data(X)

    def compute_pred_ll():
        pred_ll = 0
        for Xtr, Xte, model in zip(Xs_centered, Xtest, models):
            # Monte Carlo sample to get pi density implied by Gaussian LDS
            Npred = 10
            Tpred = Xte.shape[0]
            preds = model.sample_predictions(Xtr, Tpred, Npred=Npred)

            # Convert predictions to a distribution by finding the
            # largest dimension for each predicted Gaussian.
            # Preds is T x K x Npred, inds is TxNpred
            inds = np.argmax(preds, axis=1)
            pi = np.array([np.bincount(inds[t], minlength=K) for t in xrange(Tpred)]) / float(Npred)
            assert np.allclose(pi.sum(axis=1), 1.0)

            pi = np.clip(pi, 1e-8, 1.0)
            pi /= pi.sum(axis=1)[:,None]

            # Compute the log likelihood under pi
            pred_ll += np.sum([Multinomial(weights=pi[t], K=K).log_likelihood(Xte[t][None,:])
                              for t in xrange(Tpred)])

        return pred_ll

    # TODO: Get initial pred ll
    init_results = (0, None, np.nan, np.nan, compute_pred_ll())

    def resample():
        tic = time.time()
        [model.resample_model() for model in models]
        toc = time.time() - tic


        return toc, None, np.nan, np.nan, compute_pred_ll()


    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] +
            [resample() for _ in progprint_xrange(N_samples, perline=5)])))
    timestamps = np.cumsum(times)
    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#13
0
文件: dna_lds.py 项目: fivejjs/pgmult
def fit_gaussian_lds_model(Xs, Xtest, D_gauss_lds, N_samples=100):
    print("Fitting Gaussian (Raw) LDS with %d states" % D_gauss_lds)
    model = DefaultLDS(n=D_gauss_lds, p=K)

    Xs_centered = [X - np.mean(X, axis=0)[None,:] + 1e-3*np.random.randn(*X.shape) for X in Xs]
    for X in Xs_centered:
        model.add_data(X)

    # TODO: Get initial pred ll
    init_results = (0, None, np.nan, np.nan, np.nan)


    def resample():
        tic = time.time()
        model.resample_model()
        toc = time.time() - tic

        # Monte Carlo sample to get pi density implied by Gaussian LDS
        Tpred = Xtest.shape[0]
        Npred = 1000

        preds = model.sample_predictions(Xs_centered[0], Tpred, Npred=Npred)

        # Convert predictions to a distribution by finding the
        # largest dimension for each predicted Gaussian.
        # Preds is T x K x Npred, inds is TxNpred
        inds = np.argmax(preds, axis=1)
        pi = np.array([np.bincount(inds[t], minlength=K) for t in xrange(Tpred)]) / float(Npred)
        assert np.allclose(pi.sum(axis=1), 1.0)

        pi = np.clip(pi, 1e-8, 1.0)
        pi /= pi.sum(axis=1)[:,None]

        # Compute the log likelihood under pi
        pred_ll = np.sum([Multinomial(weights=pi[t], K=K).log_likelihood(Xtest[t][None,:])
                          for t in xrange(Tpred)])

        return toc, None, np.nan, \
            np.nan, \
            pred_ll

    n_retries = 0
    max_attempts = 5
    while n_retries < max_attempts:
        try:
            times, samples, lls, test_lls, pred_lls = \
                map(np.array, zip(*([init_results] + [resample() for _ in progprint_xrange(N_samples)])))
            timestamps = np.cumsum(times)
            return Results(lls, test_lls, pred_lls, samples, timestamps)
        except Exception as e:
            print("Caught exception: ", e.message)
            print("Retrying")
            n_retries += 1

    raise Exception("Failed to fit the Raw Gaussian LDS model in %d attempts" % max_attempts)
示例#14
0
 def meanfield_coordinate_descent(self,tol=1e-1,maxiter=250,progprint=False,**kwargs):
     # NOTE: doesn't re-initialize!
     scores = []
     step_iterator = xrange(maxiter) if not progprint else progprint_xrange(maxiter)
     for itr in step_iterator:
         scores.append(self.meanfield_coordinate_descent_step(**kwargs))
         if scores[-1] is not None and len(scores) > 1:
             if np.abs(scores[-1]-scores[-2]) < tol:
                 return scores
     print('WARNING: meanfield_coordinate_descent hit maxiter of %d' % maxiter)
     return scores
def fit_discrete_time_model_gibbs(S_dt, N_samples=100):

    # Now fit a DT model
    dt_model_test = pyhawkes.models.\
        DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max, B=B,
                                                   network_hypers=network_hypers)
    dt_model_test.add_data(S_dt)

    tic = time.time()
    for iter in progprint_xrange(N_samples, perline=25):
        dt_model_test.resample_model()
    toc = time.time()

    return (toc-tic) / N_samples
def fit_continuous_time_model_gibbs(S_ct, C_ct, N_samples=100):

    # Now fit a DT model
    ct_model = pyhawkes.models.\
        ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max,
                                         network_hypers=network_hypers)
    ct_model.add_data(S_ct, C_ct, T)

    tic = time.time()
    for iter in progprint_xrange(N_samples, perline=25):
        ct_model.resample_model()
    toc = time.time()

    return (toc-tic) / N_samples
示例#17
0
def fit_lds_model(Xs, Xtest, D, N_samples=100):
    Nx = len(Xs)
    assert len(Xtest) == Nx

    mus = [X.sum(0) + 0.1 for X in Xs]
    mus = [mu / mu.sum() for mu in mus]
    # mus = [np.ones(K)/float(K) for _ in Xs]

    models = [
        MultinomialLDS(K,
                       D,
                       init_dynamics_distn=GaussianFixed(mu=np.zeros(D),
                                                         sigma=1 * np.eye(D)),
                       dynamics_distn=AutoRegression(nu_0=D + 1,
                                                     S_0=1 * np.eye(D),
                                                     M_0=np.zeros((D, D)),
                                                     K_0=1 * np.eye(D)),
                       sigma_C=1.,
                       mu_pi=mus[i]) for i in range(Nx)
    ]

    for X, model in zip(Xs, models):
        model.add_data(X)

    [model.resample_parameters() for model in models]

    def compute_pred_ll():
        pred_ll = 0
        for Xt, model in zip(Xtest, models):
            pred_ll += model.predictive_log_likelihood(Xt, M=1)[0]

        return pred_ll

    init_results = (0, models, np.nan, np.nan, compute_pred_ll())

    def resample():
        tic = time.time()
        [model.resample_model() for model in models]
        toc = time.time() - tic

        return toc, None, np.nan, np.nan, compute_pred_ll()

    times, samples, lls, test_lls, pred_lls = \
        list(map(np.array, list(zip(*([init_results] +
            [resample() for _ in progprint_xrange(N_samples, perline=5)])))))

    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
def fit_factor_analysis(y, mask=None, N_iters=100):
    print("Fitting Factor Analysis")
    model = FactorAnalysis(D_obs, D_latent)

    if mask is None:
        mask = np.ones_like(y, dtype=bool)

    # Center the data
    b = y.mean(0)
    data = model.add_data(y - b, mask=mask)
    for _ in progprint_xrange(N_iters):
        model.resample_model()

    C_init = np.column_stack((model.W, b))
    return data.Z, C_init
示例#19
0
def fit_ln_lds_model(Xs, Xtest, D, N_samples=100):
    """
    Fit a logistic normal LDS model with pMCMC
    """
    Nx = len(Xs)
    assert len(Xtest) == Nx

    print("Fitting Logistic Normal LDS with %d states" % D)
    mus = [X.sum(0) + 0.1 for X in Xs]
    mus = [np.log(mu/mu.sum()) for mu in mus]

    models = [LogisticNormalMultinomialLDS(
                 init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1*np.eye(D)),
                 dynamics_distn=AutoRegression(nu_0=D+1,S_0=D*np.eye(D),M_0=np.zeros((D,D)),K_0=D*np.eye(D)),
                 emission_distn=Regression(nu_0=K+1,S_0=K*np.eye(K),M_0=np.zeros((K,D)),K_0=K*np.eye(D)),
                 sigma_C=1.0, mu=mu) \
              for mu in mus]

    for model in models:
        model.A = 0.5*np.eye(D)
        model.sigma_states = np.eye(D)
        model.C = 1.0*np.random.randn(K,D)
        model.sigma_obs = 0.1*np.eye(K)

    for X, model in zip(Xs, models):
        model.add_data(X)

    def compute_pred_ll():
        pred_ll = 0
        for Xte, model in zip(Xtest, models):
            pred_ll += model.predictive_log_likelihood(Xte, Npred=1)[0]
        return pred_ll

    init_results = (0, None, np.nan, np.nan, compute_pred_ll())

    def resample():
        tic = time.time()
        [model.resample_model() for model in models]
        toc = time.time() - tic

        return toc, None, np.nan, np.nan, compute_pred_ll()

    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] +
            [resample() for _ in progprint_xrange(N_samples, perline=5)])))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#20
0
文件: ap_lds.py 项目: yinsenm/pgmult
def fit_lds_model_with_pmcmc(Xs, Xtest, D, N_samples=100):
    """
    Fit a logistic normal LDS model with pMCMC
    """
    print("Fitting SBM-LDS with %d states using pMCMC" % D)
    model = ParticleSBMultinomialLDS(
        init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1 * np.eye(D)),
        dynamics_distn=AutoRegression(nu_0=D + 1,
                                      S_0=D * np.eye(D),
                                      M_0=np.zeros((D, D)),
                                      K_0=D * np.eye(D)),
        emission_distn=Regression(nu_0=K + 1,
                                  S_0=K * np.eye(K),
                                  M_0=np.zeros((K, D)),
                                  K_0=K * np.eye(D)),
        mu=pi_to_psi(np.ones(K) / K))

    model.A = 0.5 * np.eye(D)
    model.sigma_states = np.eye(D)
    model.C = np.random.randn(K - 1, D)
    model.sigma_obs = 0.1 * np.eye(K)

    for X in Xs:
        model.add_data(X)

    compute_pred_ll = lambda: sum([
        model.predictive_log_likelihood(Xt, data_index=i, Npred=10)[0]
        for i, Xt in enumerate(Xtest)
    ])

    init_results = (0, None, model.log_likelihood(), np.nan, compute_pred_ll())

    def resample():
        tic = time.time()
        model.resample_model()
        toc = time.time() - tic

        # pred_ll = model.predictive_log_likelihood(Xtest, Npred=1000)

        return toc, None, model.log_likelihood(), \
            np.nan, \
            compute_pred_ll()

    times, samples, lls, test_lls, pred_lls = \
        list(map(np.array, list(zip(*([init_results] + [resample() for _ in progprint_xrange(N_samples)])))))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#21
0
文件: ap_lds.py 项目: yinsenm/pgmult
def fit_lds_model(Xs, Xtest, D, N_samples=100):
    Nx = len(Xs)
    assert len(Xtest) == Nx

    model = MultinomialLDS(K,
                           D,
                           init_dynamics_distn=GaussianFixed(mu=np.zeros(D),
                                                             sigma=1 *
                                                             np.eye(D)),
                           dynamics_distn=AutoRegression(nu_0=D + 1,
                                                         S_0=1 * np.eye(D),
                                                         M_0=np.zeros((D, D)),
                                                         K_0=1 * np.eye(D)),
                           sigma_C=1.)

    for X in Xs:
        model.add_data(X)

    model.resample_parameters()

    compute_pred_ll = lambda: sum([
        model.predictive_log_likelihood(Xt, data_index=i, M=10)[0]
        for i, Xt in enumerate(Xtest)
    ])

    init_results = (
        0,
        None,
        model.log_likelihood(),
        # model.heldout_log_likelihood(Xtest, M=1),
        np.nan,
        compute_pred_ll())

    def resample():
        tic = time.time()
        model.resample_model()
        toc = time.time() - tic

        return toc, None, model.log_likelihood(), \
            np.nan,\
            compute_pred_ll()

    times, samples, lls, test_lls, pred_lls = \
        list(map(np.array, list(zip(*([init_results] +
            [resample() for _ in progprint_xrange(N_samples, perline=5)])))))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#22
0
def fit_ln_lds_model(Xs, Xtest, D, N_samples=100):
    """
    Fit a logistic normal LDS model with pMCMC
    """
    Nx = len(Xs)
    assert len(Xtest) == Nx

    print("Fitting Logistic Normal LDS with %d states" % D)
    mus = [X.sum(0) + 0.1 for X in Xs]
    mus = [np.log(mu / mu.sum()) for mu in mus]

    models = [LogisticNormalMultinomialLDS(
                 init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1*np.eye(D)),
                 dynamics_distn=AutoRegression(nu_0=D+1,S_0=D*np.eye(D),M_0=np.zeros((D,D)),K_0=D*np.eye(D)),
                 emission_distn=Regression(nu_0=K+1,S_0=K*np.eye(K),M_0=np.zeros((K,D)),K_0=K*np.eye(D)),
                 sigma_C=1.0, mu=mu) \
              for mu in mus]

    for model in models:
        model.A = 0.5 * np.eye(D)
        model.sigma_states = np.eye(D)
        model.C = 1.0 * np.random.randn(K, D)
        model.sigma_obs = 0.1 * np.eye(K)

    for X, model in zip(Xs, models):
        model.add_data(X)

    def compute_pred_ll():
        pred_ll = 0
        for Xte, model in zip(Xtest, models):
            pred_ll += model.predictive_log_likelihood(Xte, Npred=1)[0]
        return pred_ll

    init_results = (0, None, np.nan, np.nan, compute_pred_ll())

    def resample():
        tic = time.time()
        [model.resample_model() for model in models]
        toc = time.time() - tic

        return toc, None, np.nan, np.nan, compute_pred_ll()

    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] +
            [resample() for _ in progprint_xrange(N_samples, perline=5)])))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#23
0
def fit_gaussian_lds_model(Xs, N_samples=100):
    testmodel = DefaultLDS(n=D,p=K)

    for X in Xs:
        testmodel.add_data(X)

    samples = []
    lls = []
    for smpl in progprint_xrange(N_samples):
        testmodel.resample_model()

        samples.append(testmodel.copy_sample())
        lls.append(testmodel.log_likelihood())

    lls = np.array(lls)
    return lls
示例#24
0
def fit_gaussian_lds_model(Xs, N_samples=100):
    testmodel = DefaultLDS(n=D, p=K)

    for X in Xs:
        testmodel.add_data(X)

    samples = []
    lls = []
    for smpl in progprint_xrange(N_samples):
        testmodel.resample_model()

        samples.append(testmodel.copy_sample())
        lls.append(testmodel.log_likelihood())

    lls = np.array(lls)
    return lls
示例#25
0
def fit_lds_model(Xs, Xtest, N_samples=100):
    model = MultinomialLDS(K,
                           D,
                           init_dynamics_distn=Gaussian(mu_0=np.zeros(D),
                                                        sigma_0=np.eye(D),
                                                        kappa_0=1.0,
                                                        nu_0=D + 1.0),
                           dynamics_distn=AutoRegression(nu_0=D + 1,
                                                         S_0=np.eye(D),
                                                         M_0=np.zeros((D, D)),
                                                         K_0=np.eye(D)),
                           sigma_C=1)

    for X in Xs:
        model.add_data(X)
    data = model.data_list[0]

    samples = []
    lls = []
    test_lls = []
    mc_test_lls = []
    pis = []
    psis = []
    zs = []
    timestamps = [time.time()]
    for smpl in progprint_xrange(N_samples):
        model.resample_model()
        timestamps.append(time.time())

        samples.append(model.copy_sample())
        # TODO: Use log_likelihood() to marginalize over z
        lls.append(model.log_likelihood())
        # test_lls.append(model.heldout_log_likelihood(Xtest, M=50)[0])
        mc_test_lls.append(model._mc_heldout_log_likelihood(Xtest, M=1)[0])
        pis.append(model.pi(data))
        psis.append(model.psi(data))
        zs.append(data["states"].stateseq)

    lls = np.array(lls)
    test_lls = np.array(test_lls)
    pis = np.array(pis)
    psis = np.array(psis)
    zs = np.array(zs)
    timestamps = np.array(timestamps)
    timestamps -= timestamps[0]
    return model, lls, test_lls, mc_test_lls, pis, psis, zs, timestamps
示例#26
0
 def _EM_fit(self,method,tol=1e-1,maxiter=100,progprint=True):
     # NOTE: doesn't re-initialize!
     likes = []
     step_iterator = xrange(maxiter) if not progprint else progprint_xrange(maxiter)
     for itr in step_iterator:
         method()
         likes.append(self.log_likelihood())
         if len(likes) > 1:
             if likes[-1]-likes[-2] < tol:
                 return likes
             elif likes[-1] < likes[-2]:
                 # probably oscillation, do one more
                 method()
                 likes.append(self.log_likelihood())
                 return likes
     print 'WARNING: EM_fit reached maxiter of %d' % maxiter
     return likes
示例#27
0
def fit_lds_model_with_pmcmc(Xs, Xtest, D, N_samples=100):
    """
    Fit a logistic normal LDS model with pMCMC
    """
    Nx = len(Xs)
    assert len(Xtest) == Nx

    print("Fitting SBM-LDS with %d states using pMCMC" % D)
    models = [ParticleSBMultinomialLDS(
                init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1*np.eye(D)),
                dynamics_distn=AutoRegression(nu_0=D+1,S_0=D*np.eye(D),M_0=np.zeros((D,D)),K_0=D*np.eye(D)),
                emission_distn=Regression(nu_0=K+1,S_0=K*np.eye(K),M_0=np.zeros((K,D)),K_0=K*np.eye(D)),
                mu=pi_to_psi(np.ones(K)/K),
                sigma_C=1.0)
             for _ in range(Nx)]

    for model in models:
        model.A = 0.5*np.eye(D)
        model.sigma_states = np.eye(D)
        model.C = np.random.randn(K-1,D)
        model.sigma_obs = 0.1*np.eye(K)

    for X, model in zip(Xs, models):
        model.add_data(X)

    def compute_pred_ll():
        pred_ll = 0
        for Xte, model in zip(Xtest, models):
            pred_ll += model.predictive_log_likelihood(Xte, Npred=100)[0]
        return pred_ll

    init_results = (0, None, np.nan, np.nan, compute_pred_ll())

    def resample():
        tic = time.time()
        [model.resample_model() for model in models]
        toc = time.time() - tic

        return toc, None, np.nan, np.nan, compute_pred_ll()

    times, samples, lls, test_lls, pred_lls = \
        list(map(np.array, list(zip(*([init_results] +
            [resample() for _ in progprint_xrange(N_samples, perline=5)])))))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#28
0
 def _EM_fit(self,method,tol=1e-1,maxiter=100,progprint=False):
     # NOTE: doesn't re-initialize!
     likes = []
     step_iterator = xrange(maxiter) if not progprint else progprint_xrange(maxiter)
     for itr in step_iterator:
         method()
         likes.append(self.log_likelihood())
         if len(likes) > 1:
             if likes[-1]-likes[-2] < tol:
                 return likes
             elif likes[-1] < likes[-2]:
                 # probably oscillation, do one more
                 method()
                 likes.append(self.log_likelihood())
                 return likes
     print('WARNING: EM_fit reached maxiter of %d' % maxiter)
     return likes
示例#29
0
 def meanfield_coordinate_descent(self,
                                  tol=1e-1,
                                  maxiter=250,
                                  progprint=False,
                                  **kwargs):
     # NOTE: doesn't re-initialize!
     scores = []
     step_iterator = xrange(maxiter) if not progprint else progprint_xrange(
         maxiter)
     for itr in step_iterator:
         scores.append(self.meanfield_coordinate_descent_step(**kwargs))
         if scores[-1] is not None and len(scores) > 1:
             if np.abs(scores[-1] - scores[-2]) < tol:
                 return scores
     print('WARNING: meanfield_coordinate_descent hit maxiter of %d' %
           maxiter)
     return scores
示例#30
0
def em_example(true_model, X, Z_true, mask):
    # Fit a test model
    model = FactorAnalysis(
        D_obs, D_latent,
        # W=true_model.W, sigmasq=true_model.sigmasq
        )
    inf_data = model.add_data(X, mask=mask)
    model.set_empirical_mean()

    lps = []
    angles = []
    N_iters = 100
    for _ in progprint_xrange(N_iters):
        model.EM_step()
        lps.append(model.log_likelihood())
        angles.append(principal_angle(true_model.W, model.W))

    plot_results(lps, angles, Z_true, inf_data.E_Z)
示例#31
0
def fit_ln_lds_model(Xs, Xtest, D, N_samples=100):
    """
    Fit a logistic normal LDS model with pMCMC
    """
    print("Fitting Logistic Normal LDS with %d states" % D)
    model = LogisticNormalMultinomialLDS(
        init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1 * np.eye(D)),
        dynamics_distn=AutoRegression(nu_0=D + 1,
                                      S_0=D * np.eye(D),
                                      M_0=np.zeros((D, D)),
                                      K_0=D * np.eye(D)),
        emission_distn=Regression(nu_0=K + 1,
                                  S_0=K * np.eye(K),
                                  M_0=np.zeros((K, D)),
                                  K_0=K * np.eye(D)),
        sigma_C=0.1)

    model.A = 0.5 * np.eye(D)
    model.sigma_states = np.eye(D)
    model.C = 0.33 * np.random.randn(K, D)
    model.sigma_obs = 0.1 * np.eye(K)

    for X in Xs:
        model.add_data(X)

    init_results = (0, None, model.log_likelihood(), np.nan,
                    model.predictive_log_likelihood(Xtest, Npred=1000))

    def resample():
        tic = time.time()
        model.resample_model()
        toc = time.time() - tic

        pred_ll = model.predictive_log_likelihood(Xtest, Npred=1000)

        return toc, None, model.log_likelihood(), \
            np.nan, \
            pred_ll

    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] + [resample() for _ in progprint_xrange(N_samples)])))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
def gibbs_example(true_model, X, Z_true, mask):
    # Fit a test model
    model = FactorAnalysis(
        D_obs,
        D_latent,
        # W=true_model.W, sigmasq=true_model.sigmasq
    )
    inf_data = model.add_data(X, mask=mask)
    model.set_empirical_mean()

    lps = []
    angles = []
    N_iters = 100
    for _ in progprint_xrange(N_iters):
        model.resample_model()
        lps.append(model.log_likelihood())
        angles.append(principal_angle(true_model.W, model.W))

    plot_results(lps, angles, Z_true, inf_data.Z)
def em_example(true_model, true_data):
    X, mask = true_data.X, true_data.mask

    # Fit a test model
    model = FactorAnalysis(
        D_obs, D_latent,
        # W=true_model.W, sigmasq=true_model.sigmasq
        )
    inf_data = model.add_data(X, mask=mask)

    lps = []
    angles = []
    N_iters = 100
    for _ in progprint_xrange(N_iters):
        model.EM_step()
        lps.append(model.log_likelihood())
        angles.append(principal_angle(true_model.W, model.W))

    plot_results(lps, angles, true_data.Z, inf_data.E_Z)
示例#34
0
def meanfield_example(true_model, X, Z_true, mask):
    # Fit a test model
    model = FactorAnalysis(
        D_obs, D_latent,
        # W=true_model.W, sigmasq=true_model.sigmasq
        )
    inf_data = model.add_data(X, mask=mask)
    model.set_empirical_mean()

    lps = []
    angles = []
    N_iters = 100
    for _ in progprint_xrange(N_iters):
        model.meanfield_coordinate_descent_step()
        lps.append(model.expected_log_likelihood())
        E_W, _, _, _ = model.regression.mf_expectations
        angles.append(principal_angle(true_model.W, E_W))

    plot_results(lps, angles, Z_true, inf_data.Z)
示例#35
0
def fit_lds_model(Xs, Xtest, D, N_samples=100):
    Nx = len(Xs)
    assert len(Xtest) == Nx

    mus = [X.sum(0) + 0.1 for X in Xs]
    mus = [mu/mu.sum() for mu in mus]
    # mus = [np.ones(K)/float(K) for _ in Xs]

    models = [MultinomialLDS(K, D,
        init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1*np.eye(D)),
        dynamics_distn=AutoRegression(nu_0=D+1,S_0=1*np.eye(D),M_0=np.zeros((D,D)),K_0=1*np.eye(D)),
        sigma_C=1., mu_pi=mus[i]) for i in xrange(Nx)]

    for X, model in zip(Xs, models):
        model.add_data(X)


    [model.resample_parameters() for model in models]


    def compute_pred_ll():
        pred_ll = 0
        for Xt, model in zip(Xtest, models):
            pred_ll += model.predictive_log_likelihood(Xt, M=1)[0]

        return pred_ll

    init_results = (0, models, np.nan, np.nan, compute_pred_ll())

    def resample():
        tic = time.time()
        [model.resample_model() for model in models]
        toc = time.time() - tic

        return toc, None, np.nan,  np.nan, compute_pred_ll()

    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] +
            [resample() for _ in progprint_xrange(N_samples, perline=5)])))

    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#36
0
def gibbs_example(true_model, true_data):
    X, mask = true_data.X, true_data.mask

    # Fit a test model
    model = FactorAnalysis(
        D_obs, D_latent,
        # W=true_model.W, sigmasq=true_model.sigmasq
        )
    inf_data = model.add_data(X, mask=mask)

    lps = []
    angles = []
    N_iters = 100
    for _ in progprint_xrange(N_iters):
        model.resample_model()
        lps.append(model.log_likelihood())
        angles.append(principal_angle(true_model.W, model.W))

    plot_results(lps, angles, true_data.Z, inf_data.Z)
def meanfield_example(true_model, true_data):
    X, mask = true_data.X, true_data.mask

    # Fit a test model
    model = FactorAnalysis(
        D_obs, D_latent,
        # W=true_model.W, sigmasq=true_model.sigmasq
        )
    inf_data = model.add_data(X, mask=mask)

    lps = []
    angles = []
    N_iters = 100
    for _ in progprint_xrange(N_iters):
        model.meanfield_coordinate_descent_step()
        lps.append(model.expected_log_likelihood())
        E_W, _, _, _ = model.regression.mf_expectations
        angles.append(principal_angle(true_model.W, E_W))

    plot_results(lps, angles, true_data.Z, inf_data.Z)
示例#38
0
文件: ap_lds.py 项目: fivejjs/pgmult
def fit_lds_model_with_pmcmc(Xs, Xtest, D, N_samples=100):
    """
    Fit a logistic normal LDS model with pMCMC
    """
    print("Fitting SBM-LDS with %d states using pMCMC" % D)
    model = ParticleSBMultinomialLDS(
        init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1*np.eye(D)),
        dynamics_distn=AutoRegression(nu_0=D+1,S_0=D*np.eye(D),M_0=np.zeros((D,D)),K_0=D*np.eye(D)),
        emission_distn=Regression(nu_0=K+1,S_0=K*np.eye(K),M_0=np.zeros((K,D)),K_0=K*np.eye(D)),
        mu=pi_to_psi(np.ones(K)/K))

    model.A = 0.5*np.eye(D)
    model.sigma_states = np.eye(D)
    model.C = np.random.randn(K-1,D)
    model.sigma_obs = 0.1*np.eye(K)

    for X in Xs:
        model.add_data(X)

    compute_pred_ll = lambda: sum([model.predictive_log_likelihood(Xt, data_index=i, Npred=10)[0]
                                   for i,Xt in enumerate(Xtest)])

    init_results = (0, None, model.log_likelihood(),
                    np.nan, compute_pred_ll())

    def resample():
        tic = time.time()
        model.resample_model()
        toc = time.time() - tic

        # pred_ll = model.predictive_log_likelihood(Xtest, Npred=1000)

        return toc, None, model.log_likelihood(), \
            np.nan, \
            compute_pred_ll()

    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] + [resample() for _ in progprint_xrange(N_samples)])))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#39
0
文件: dtm.py 项目: fivejjs/pgmult
def fit_sbdtm_gibbs(train_data, test_data, timestamps, K, Niter, alpha_theta):
    def evaluate(model):
        ll, pll = model.log_likelihood(), model.log_likelihood(test_data)
        # print '{} '.format(ll),
        return ll, pll

    def sample(model):
        tic = time.time()
        model.resample()
        timestep = time.time() - tic
        return evaluate(model), timestep

    print "Running sbdtm gibbs..."
    model = StickbreakingDynamicTopicsLDA(train_data, timestamps, K, alpha_theta)
    init_val = evaluate(model)
    vals, timesteps = zip(*[sample(model) for _ in progprint_xrange(Niter)])

    lls, plls = zip(*((init_val,) + vals))
    times = np.cumsum((0,) + timesteps)

    return Results(lls, plls, model.copy_sample(), times)
示例#40
0
def fit_lds_model(Xs, Xtest, N_samples=100):
    model = MultinomialLDS(K, D,
        init_dynamics_distn=Gaussian(mu_0=np.zeros(D), sigma_0=np.eye(D), kappa_0=1.0, nu_0=D+1.0),
        dynamics_distn=AutoRegression(nu_0=D+1,S_0=np.eye(D),M_0=np.zeros((D,D)),K_0=np.eye(D)),
        sigma_C=1
        )

    for X in Xs:
        model.add_data(X)
    data = model.data_list[0]

    samples = []
    lls = []
    test_lls = []
    mc_test_lls = []
    pis = []
    psis = []
    zs = []
    timestamps = [time.time()]
    for smpl in progprint_xrange(N_samples):
        model.resample_model()
        timestamps.append(time.time())

        samples.append(model.copy_sample())
        # TODO: Use log_likelihood() to marginalize over z
        lls.append(model.log_likelihood())
        # test_lls.append(model.heldout_log_likelihood(Xtest, M=50)[0])
        mc_test_lls.append(model._mc_heldout_log_likelihood(Xtest, M=1)[0])
        pis.append(model.pi(data))
        psis.append(model.psi(data))
        zs.append(data["states"].stateseq)

    lls = np.array(lls)
    test_lls = np.array(test_lls)
    pis = np.array(pis)
    psis = np.array(psis)
    zs = np.array(zs)
    timestamps = np.array(timestamps)
    timestamps -= timestamps[0]
    return model, lls, test_lls, mc_test_lls, pis, psis, zs, timestamps
def fit_arhmm(x, affine=True):
    print("Fitting Sticky ARHMM")
    dynamics_hypparams = \
        dict(nu_0=D_latent + 2,
             S_0=np.eye(D_latent),
             M_0=np.hstack((np.eye(D_latent), np.zeros((D_latent, int(affine))))),
             K_0=np.eye(D_latent + affine),
             affine=affine)
    dynamics_hypparams = get_empirical_ar_params([x], dynamics_hypparams)

    dynamics_distns = [
        AutoRegression(A=np.column_stack(
            (0.99 * np.eye(D_latent), np.zeros((D_latent, int(affine))))),
                       sigma=np.eye(D_latent),
                       **dynamics_hypparams) for _ in range(K)
    ]

    init_distn = Gaussian(nu_0=D_latent + 2,
                          sigma_0=np.eye(D_latent),
                          mu_0=np.zeros(D_latent),
                          kappa_0=1.0)

    arhmm = ARWeakLimitStickyHDPHMM(init_state_distn='uniform',
                                    init_emission_distn=init_distn,
                                    obs_distns=dynamics_distns,
                                    alpha=3.0,
                                    kappa=10.0,
                                    gamma=3.0)

    arhmm.add_data(x)

    lps = []
    for _ in progprint_xrange(1000):
        arhmm.resample_model()
        lps.append(arhmm.log_likelihood())

    z_init = arhmm.states_list[0].stateseq
    z_init = np.concatenate(([0], z_init))

    return arhmm, z_init
示例#42
0
def fit_vb(name, model, test_data, N_iter=1000, init_state_seq=None):
    def evaluate(model):
        ll = model.log_likelihood()
        pll = model.log_likelihood(test_data)
        N_used = len(model.used_states)
        trans = model.trans_distn
        alpha = trans.alpha
        gamma = trans.gamma if hasattr(trans, "gamma") else None
        rates = model.rates.copy()
        obs_hypers = model.obs_hypers
        # print 'N_states: {}, \tPLL:{}\n'.format(len(model.used_states), pll),
        return ll, pll, N_used, alpha, gamma, rates, obs_hypers

    def sample(model):
        tic = time.time()
        model.meanfield_coordinate_descent_step()
        timestep = time.time() - tic

        # Resample from mean field posterior
        model._resample_from_mf()

        return evaluate(model), timestep

    # Initialize with given state seq
    if init_state_seq is not None:
        model.states_list[0].stateseq = init_state_seq
        for _ in xrange(100):
            model.resample_obs_distns()

    init_val = evaluate(model)
    vals, timesteps = zip(*[sample(model) for _ in progprint_xrange(200)])

    lls, plls, N_used, alphas, gammas, rates, obs_hypers = \
        zip(*((init_val,) + vals))
    timestamps = np.cumsum((0.,) + timesteps)

    return Results(name, lls, plls, N_used, alphas, gammas,
                   rates, obs_hypers,
                   model.copy_sample(), timestamps)
示例#43
0
文件: dna_lds.py 项目: fivejjs/pgmult
def fit_ln_lds_model(Xs, Xtest, D, N_samples=100):
    """
    Fit a logistic normal LDS model with pMCMC
    """
    print("Fitting Logistic Normal LDS with %d states" % D)
    model = LogisticNormalMultinomialLDS(
        init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1*np.eye(D)),
        dynamics_distn=AutoRegression(nu_0=D+1,S_0=D*np.eye(D),M_0=np.zeros((D,D)),K_0=D*np.eye(D)),
        emission_distn=Regression(nu_0=K+1,S_0=K*np.eye(K),M_0=np.zeros((K,D)),K_0=K*np.eye(D)),
        sigma_C=0.1)

    model.A = 0.5*np.eye(D)
    model.sigma_states = np.eye(D)
    model.C = 0.33 * np.random.randn(K,D)
    model.sigma_obs = 0.1*np.eye(K)

    for X in Xs:
        model.add_data(X)

    init_results = (0, None, model.log_likelihood(),
                    np.nan, model.predictive_log_likelihood(Xtest, Npred=1000))

    def resample():
        tic = time.time()
        model.resample_model()
        toc = time.time() - tic

        pred_ll = model.predictive_log_likelihood(Xtest, Npred=1000)

        return toc, None, model.log_likelihood(), \
            np.nan, \
            pred_ll

    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] + [resample() for _ in progprint_xrange(N_samples)])))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#44
0
文件: ap_lds.py 项目: fivejjs/pgmult
def fit_lds_model(Xs, Xtest, D, N_samples=100):
    Nx = len(Xs)
    assert len(Xtest) == Nx

    model = MultinomialLDS(K, D,
        init_dynamics_distn=GaussianFixed(mu=np.zeros(D), sigma=1*np.eye(D)),
        dynamics_distn=AutoRegression(nu_0=D+1,S_0=1*np.eye(D),M_0=np.zeros((D,D)),K_0=1*np.eye(D)),
        sigma_C=1.
        )

    for X in Xs:
        model.add_data(X)

    model.resample_parameters()

    compute_pred_ll = lambda: sum([model.predictive_log_likelihood(Xt, data_index=i, M=10)[0]
                                   for i,Xt in enumerate(Xtest)])

    init_results = (0, None, model.log_likelihood(),
                    # model.heldout_log_likelihood(Xtest, M=1),
                    np.nan,
                    compute_pred_ll())

    def resample():
        tic = time.time()
        model.resample_model()
        toc = time.time() - tic

        return toc, None, model.log_likelihood(), \
            np.nan,\
            compute_pred_ll()

    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] +
            [resample() for _ in progprint_xrange(N_samples, perline=5)])))
    timestamps = np.cumsum(times)

    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#45
0
    def max_likelihood(self, data, weights=None, stats=None):
        """
        Maximize the likelihood for given data
        :param data:
        :param weights:
        :param stats:
        :return:
        """
        if isinstance(data, list):
            x = np.vstack([d[0] for d in data])
            y = np.vstack([d[1] for d in data])
        elif isinstance(data, tuple):
            assert len(data) == 2
        elif isinstance(data, np.ndarray):
            x, y = data[:, :self.D_in], data[:, self.D_in:]
        else:
            raise Exception("Invalid data type")

        from sklearn.linear_model import LogisticRegression
        for n in progprint_xrange(self.D_out):
            lr = LogisticRegression(fit_intercept=False)
            lr.fit(x, y[:, n])
            self.A[n] = lr.coef_
示例#46
0
def fit_sbdtm_gibbs(train_data, test_data, timestamps, K, Niter, alpha_theta):
    def evaluate(model):
        ll, pll = \
            model.log_likelihood(), \
            model.log_likelihood(test_data)
        # print '{} '.format(ll),
        return ll, pll

    def sample(model):
        tic = time.time()
        model.resample()
        timestep = time.time() - tic
        return evaluate(model), timestep

    print 'Running sbdtm gibbs...'
    model = StickbreakingDynamicTopicsLDA(train_data, timestamps, K,
                                          alpha_theta)
    init_val = evaluate(model)
    vals, timesteps = zip(*[sample(model) for _ in progprint_xrange(Niter)])

    lls, plls = zip(*((init_val, ) + vals))
    times = np.cumsum((0, ) + timesteps)

    return Results(lls, plls, model.copy_sample(), times)
示例#47
0
del priormodel

plt.figure()
plt.plot(data[:, 0], data[:, 1], 'kx')
plt.title('data')

min_num_components, max_num_components = (1, 12)
num_tries_each = 5

### search over models using BIC as a model selection criterion

BICs = []
examplemodels = []
for idx, num_components in enumerate(
        progprint_xrange(min_num_components, max_num_components + 1)):
    theseBICs = []
    for i in xrange(num_tries_each):
        fitmodel = models.Mixture(
            alpha_0=
            10000,  # used for random initialization Gibbs sampling, big means use all components
            components=[
                distributions.Gaussian(**obs_hypparams)
                for itr in range(num_components)
            ])

        fitmodel.add_data(data)

        # use Gibbs sampling for initialization
        for itr in xrange(100):
            fitmodel.resample_model()
示例#48
0
        bkgd_hypers=true_model.bkgd_hypers,
        impulse_hypers=true_model.impulse_hypers,
        weight_hypers=true_model.weight_hypers)
    test_model.add_data(S)

    # Initialize with the standard model parameters
    if init_model is not None:
        test_model.initialize_with_standard_model(init_model)

    ###########################################################
    # Fit the test model with Gibbs sampling
    ###########################################################
    N_samples = 500
    samples = []
    lps = []
    for itr in progprint_xrange(N_samples):
        lps.append(test_model.log_probability())
        samples.append(test_model.copy_sample())
        test_model.resample_model()

    ###########################################################
    # Analyze the samples
    ###########################################################
    N_samples = len(samples)
    A_samples = np.array([s.weight_model.A for s in samples])
    W_samples = np.array([s.weight_model.W for s in samples])
    g_samples = np.array([s.impulse_model.g for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
    lps = np.array(lps)

    offset = N_samples // 2
示例#49
0
def fit_ct_network_hawkes_gibbs(S,
                                S_test,
                                dt,
                                dt_max,
                                output_path,
                                model_args={},
                                standard_model=None,
                                N_samples=100,
                                time_limit=8 * 60 * 60):

    K = S.shape[1]
    S_ct, C_ct, T = convert_discrete_to_continuous(S, dt)
    S_test_ct, C_test_ct, T_test = convert_discrete_to_continuous(S_test, dt)

    # Check for existing Gibbs results
    if os.path.exists(output_path):
        with gzip.open(output_path, 'r') as f:
            print("Loading Gibbs results from ", output_path)
            results = pickle.load(f)
    else:
        print(
            "Fitting the data with a continuous time network Hawkes model using Gibbs sampling"
        )

        test_model = \
            ContinuousTimeNetworkHawkesModel(K, dt_max=dt_max, **model_args)
        test_model.add_data(S_ct, C_ct, T)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        # Gibbs sample
        samples = []
        lps = [test_model.log_probability()]
        hlls = [
            test_model.heldout_log_likelihood(S_test_ct, C_test_ct, T_test)
        ]
        times = [0]
        for _ in progprint_xrange(N_samples, perline=25):
            # Update the model
            tic = time.time()
            test_model.resample_model()
            times.append(time.time() - tic)

            samples.append(copy.deepcopy(test_model.get_parameters()))

            # Compute log probability and heldout log likelihood
            # lps.append(test_model.log_probability())
            hlls.append(
                test_model.heldout_log_likelihood(S_test_ct, C_test_ct,
                                                  T_test))

            # # Save this sample
            # with open(output_path + ".gibbs.itr%04d.pkl" % itr, 'w') as f:
            #     cPickle.dump(samples[-1], f, protocol=-1)

            # Check if time limit has been exceeded
            if np.sum(times) > time_limit:
                break

        # Get cumulative timestamps
        timestamps = np.cumsum(times)
        lps = np.array(lps)
        hlls = np.array(hlls)

        # Make results object
        results = Results(samples, timestamps, lps, hlls)

        # Save the Gibbs samples
        with gzip.open(output_path, 'w') as f:
            print("Saving Gibbs samples to ", output_path)
            pickle.dump(results, f, protocol=-1)

    return results
示例#50
0
ct_model = pyhawkes.models.ContinuousTimeNetworkHawkesModel(
    K, dt_max=1., network_hypers=network_hypers)
ct_model.add_data(S_ct, C_ct, T)
# ct.resample_model()

# Hard code parameters
ct_model.bias_model.lambda0 = dt_model.bias_model.lambda0
ct_model.weight_model.A = dt_model.weight_model.A
ct_model.weight_model.W = dt_model.weight_model.W
print("CT LL: ", ct_model.heldout_log_likelihood(S_ct, C_ct, T))

# Fit the CT model
ct_lls = [ct_model.log_likelihood()]
N_samples = 100
for itr in progprint_xrange(N_samples, perline=25):
    ct_model.resample_model()
    ct_lls.append(ct_model.log_likelihood())
    assert np.all(ct_model.weight_model.A == 1)

# Now fit a DT model
dt_model_test = pyhawkes.models.\
    DiscreteTimeNetworkHawkesModelSpikeAndSlab(K=K, dt=dt, dt_max=dt_max, B=B,
                                               network_hypers=network_hypers)
dt_model_test.add_data(S_dt)
dt_lls = []
for itr in progprint_xrange(N_samples, perline=25):
    dt_model_test.resample_model()
    dt_lls.append(dt_model_test.log_likelihood())
    assert np.all(dt_model_test.weight_model.A == 1)
示例#51
0
                 emission_distn=BernoulliRegression(D_out=D_obs,
                                                    D_in=D_latent + D_input))
model.add_data(data, inputs=inputs, stateseq=np.zeros((T, D_latent)))

# Run a Gibbs sampler with Polya-gamma augmentation
N_samples = 50


def gibbs_update(model):
    model.resample_model()
    smoothed_obs = model.states_list[0].smooth()
    ll = model.log_likelihood()
    return ll, model.states_list[0].gaussian_states, smoothed_obs

lls_gibbs, x_smpls_gibbs, y_smooth_gibbs = \
    zip(*[gibbs_update(model) for _ in progprint_xrange(N_samples)])

# Fit with a Bernoulli LDS using Laplace approximation for comparison
model = DefaultBernoulliLDS(D_obs,
                            D_latent,
                            D_input=D_input,
                            C=0.01 * np.random.randn(D_obs, D_latent),
                            D=0.01 * np.random.randn(D_obs, D_input))
model.add_data(data, inputs=inputs, stateseq=np.zeros((T, D_latent)))

N_iters = 50


def em_update(model):
    model.EM_step(verbose=True)
    smoothed_obs = model.states_list[0].smooth()
示例#52
0
                 emission_distn=DiagonalRegression(D_obs, D_latent + D_input))
diag_model.add_data(data, inputs=inputs)

# Also fit a model with a full covariance matrix
full_model = DefaultLDS(D_obs, D_latent, D_input)
full_model.add_data(data, inputs=inputs)


# Fit with Gibbs sampling
def update(model):
    model.resample_model()
    return model.log_likelihood()


N_steps = 100
diag_lls = [update(diag_model) for _ in progprint_xrange(N_steps)]
full_lls = [update(full_model) for _ in progprint_xrange(N_steps)]

plt.figure()
plt.plot([0, N_steps],
         truemodel.log_likelihood() * np.ones(2),
         '--k',
         label="true")
plt.plot(diag_lls, label="diag cov.")
plt.plot(full_lls, label="full cov.")
plt.xlabel('iteration')
plt.ylabel('log likelihood')
plt.legend()

# Predict forward in time
T_given = 1800
示例#53
0
def fit_network_hawkes_svi(S,
                           S_test,
                           dt,
                           dt_max,
                           output_path,
                           model_args={},
                           standard_model=None,
                           N_samples=100,
                           time_limit=8 * 60 * 60,
                           delay=10.0,
                           forgetting_rate=0.25):

    T, K = S.shape

    # Check for existing Gibbs results
    if os.path.exists(output_path):
        with gzip.open(output_path, 'r') as f:
            print("Loading SVI results from ", output_path)
            results = pickle.load(f)
    else:
        print("Fitting the data with a network Hawkes model using SVI")

        test_model = DiscreteTimeNetworkHawkesModelGammaMixtureSBM(
            K=K, dt=dt, dt_max=dt_max, **model_args)
        test_model.add_data(S)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        # Precompute F_test
        F_test = test_model.basis.convolve_with_basis(S_test)

        # Initialize with the standard model parameters
        if standard_model is not None:
            test_model.initialize_with_standard_model(standard_model)

        # TODO: Add the data in minibatches
        minibatchsize = 3000
        stepsize = (np.arange(N_samples) + delay)**(-forgetting_rate)

        # Stochastic variational inference
        samples = []
        lps = [test_model.log_probability()]
        hlls = [test_model.heldout_log_likelihood(S_test)]
        times = [0]
        for itr in progprint_xrange(N_samples):
            # Update the model
            tic = time.time()
            test_model.sgd_step(minibatchsize=minibatchsize,
                                stepsize=stepsize[itr])
            times.append(time.time() - tic)

            # Resample from variational posterior to compute log prob and hlls
            test_model.resample_from_mf()
            # samples.append(test_model.copy_sample())
            samples.append(copy.deepcopy(test_model.get_parameters()))

            # Compute log probability and heldout log likelihood
            # lps.append(test_model.log_probability())
            hlls.append(test_model.heldout_log_likelihood(S_test, F=F_test))

            # Save this sample
            # with open(output_path + ".svi.itr%04d.pkl" % itr, 'w') as f:
            #     cPickle.dump(samples[-1], f, protocol=-1)

            # Check if time limit has been exceeded
            if np.sum(times) > time_limit:
                break

        # Get cumulative timestamps
        timestamps = np.cumsum(times)
        lps = np.array(lps)
        hlls = np.array(hlls)

        # Make results object
        results = Results(samples, timestamps, lps, hlls)

        # Save the Gibbs samples
        with gzip.open(output_path, 'w') as f:
            print("Saving SVI samples to ", output_path)
            pickle.dump(results, f, protocol=-1)

    return results
示例#54
0
    T = 50
    dt = 1.0
    dt_max = 3.0
    network_hypers = {'c': np.array([0], dtype=np.int),
                      'p': 0.5, 'kappa': 3.0, 'v': 15.0}
    weight_hypers = {"kappa_0": 3.0, "nu_0": 15.0}
    model = DiscreteTimeNetworkHawkesModelGammaMixture(K=1, dt=dt, dt_max=dt_max,
                                                       weight_hypers=weight_hypers,
                                                       network_hypers=network_hypers)
    model.generate(T=T)

    # Gibbs sample and then generate new data
    N_samples = 10000
    samples = []
    lps = []
    for itr in progprint_xrange(N_samples, perline=50):
        # Resample the model
        model.resample_model(resample_network=False)
        samples.append(model.copy_sample())
        lps.append(model.log_probability())

        # Geweke step
        model.data_list.pop()
        model.generate(T=T)


    # Compute sample statistics for second half of samples
    A_samples       = np.array([s.weight_model.A     for s in samples])
    W_samples       = np.array([s.weight_model.W     for s in samples])
    g_samples       = np.array([s.impulse_model.g    for s in samples])
    lambda0_samples = np.array([s.bias_model.lambda0 for s in samples])
示例#55
0
def _collect(r):
    ll = r.log_likelihood((X, y))[~inds].sum()
    err = ((y - r.predict(X))**2).sum(1)
    mse = np.mean(err[~inds])
    return r.A.copy(), ll, mse


def _update(r):
    r.resample([(X, y)])
    return _collect(r)


# Fit the standard regression
smpls = [_collect(std_reg)]
for _ in progprint_xrange(100):
    smpls.append(_update(std_reg))
smpls = zip(*smpls)
std_As, std_lls, std_mses = tuple(map(np.array, smpls))

# Fit the robust regression
smpls = [_collect(robust_reg)]
for _ in progprint_xrange(100):
    smpls.append(_update(robust_reg))
smpls = zip(*smpls)
robust_As, robust_lls, robust_mses = tuple(map(np.array, smpls))

# Plot the inferred regression function
plt.figure(figsize=(8, 4))
xlim = (-3, 3)
ylim = abs(y).max()
示例#56
0
    D = 10         # Number of documents
    V = 20          # Number of words
    N = 20         # Number of words per document
    alpha_beta = 1.0

    # Generate synthetic data
    data = np.random.poisson(2, (D,V))
    data = csr_matrix(data)

    # Sample a GP
    model = StickbreakingCorrelatedLDA(data, T, alpha_beta=alpha_beta)

    # Run a Geweke test
    thetas = []
    betas = []
    for itr in progprint_xrange(N_iter):
        # Resample the data
        model.generate(N, keep=True)

        # Resample the parameters
        model.resample()

        # Update our samples
        thetas.append(model.theta.copy())
        betas.append(model.beta.copy())

    # Check that the PG-Multinomial samples are distributed like the prior
    thetas = np.array(thetas)
    theta_mean = thetas.mean(0)
    theta_std  = thetas.std(0)
示例#57
0
文件: test_lds.py 项目: sheqi/pyglm
                                 kappa_0=1.0, nu_0=D_in+1),
    dynamics_distn=AutoRegression(
            sigma=sigma_states, nu_0=D_in+1, S_0=D_in*np.eye(D_in), M_0=np.zeros((D_in, D_in)), K_0=D_in*np.eye(D_in)),
    emission_distn=PGEmissions(D_out, D_in, C=C, sigmasq_C=1.0))
model.add_data(data.X)

N_samples = 1000
def update(model):
    model.resample_model()
    z_inf = model.states_list[0].stateseq
    C_inf = model.C
    psi_inf = z_inf.dot(C_inf.T)
    p_inf = logistic(psi_inf)
    return model.log_likelihood(), p_inf

results = [update(model) for _ in progprint_xrange(N_samples)]
lls = np.array([r[0] for r in results])
ps = np.array([r[1] for r in results])

plt.figure()
plt.plot(data.X, 'bx', ls="none")
# plt.plot(psi, 'r')
plt.plot(p_true, 'r', lw=2)
plt.errorbar(np.arange(T), ps.mean(0), yerr=ps.std(0), fmt='--r', )
plt.ylim(-0.1, data.X.max() + .1)

plt.figure()
plt.plot(lls)
plt.xlabel("Iteration")
plt.ylabel("LL")
plt.show()
示例#58
0
def fit_gaussian_lds_model(Xs, Xtest, D_gauss_lds, N_samples=100):
    Nx = len(Xs)
    assert len(Xtest) == Nx

    print("Fitting Gaussian (Raw) LDS with %d states" % D_gauss_lds)
    from pylds.models import NonstationaryLDS
    models = [
        NonstationaryLDS(
            init_dynamics_distn=GaussianFixed(mu=np.zeros(D),
                                              sigma=1 * np.eye(D)),
            dynamics_distn=AutoRegression(nu_0=D + 1,
                                          S_0=1 * np.eye(D),
                                          M_0=np.zeros((D, D)),
                                          K_0=1 * np.eye(D)),
            emission_distn=Regression(nu_0=K + 1,
                                      S_0=K * np.eye(K),
                                      M_0=np.zeros((K, D)),
                                      K_0=K * np.eye(D))) for _ in xrange(Nx)
    ]

    Xs_centered = [
        X - np.mean(X, axis=0)[None, :] + 1e-3 * np.random.randn(*X.shape)
        for X in Xs
    ]
    for X, model in zip(Xs_centered, models):
        model.add_data(X)

    def compute_pred_ll():
        pred_ll = 0
        for Xtr, Xte, model in zip(Xs_centered, Xtest, models):
            # Monte Carlo sample to get pi density implied by Gaussian LDS
            Npred = 10
            Tpred = Xte.shape[0]
            preds = model.sample_predictions(Xtr, Tpred, Npred=Npred)

            # Convert predictions to a distribution by finding the
            # largest dimension for each predicted Gaussian.
            # Preds is T x K x Npred, inds is TxNpred
            inds = np.argmax(preds, axis=1)
            pi = np.array(
                [np.bincount(inds[t], minlength=K)
                 for t in xrange(Tpred)]) / float(Npred)
            assert np.allclose(pi.sum(axis=1), 1.0)

            pi = np.clip(pi, 1e-8, 1.0)
            pi /= pi.sum(axis=1)[:, None]

            # Compute the log likelihood under pi
            pred_ll += np.sum([
                Multinomial(weights=pi[t], K=K).log_likelihood(Xte[t][None, :])
                for t in xrange(Tpred)
            ])

        return pred_ll

    # TODO: Get initial pred ll
    init_results = (0, None, np.nan, np.nan, compute_pred_ll())

    def resample():
        tic = time.time()
        [model.resample_model() for model in models]
        toc = time.time() - tic

        return toc, None, np.nan, np.nan, compute_pred_ll()


    times, samples, lls, test_lls, pred_lls = \
        map(np.array, zip(*([init_results] +
            [resample() for _ in progprint_xrange(N_samples, perline=5)])))
    timestamps = np.cumsum(times)
    return Results(lls, test_lls, pred_lls, samples, timestamps)
示例#59
0
文件: EM.py 项目: yorkerlin/pylds
D_obs = 1
D_latent = 2
D_input = 0
T = 2000

# Simulate from one LDS
truemodel = DefaultLDS(D_obs, D_latent, D_input)
inputs = np.random.randn(T, D_input)
data, stateseq = truemodel.generate(T, inputs=inputs)

# Fit with another LDS
model = DefaultLDS(D_obs, D_latent, D_input)
model.add_data(data, inputs=inputs)

# Initialize with a few iterations of Gibbs
for _ in progprint_xrange(10):
    model.resample_model()


# Run EM
def update(model):
    model.EM_step()
    return model.log_likelihood()


lls = [update(model) for _ in progprint_xrange(50)]

# Plot the log likelihoods
plt.figure()
plt.plot(lls)
plt.xlabel('iteration')
示例#60
0
def fit_gaussian_lds_model(Xs, Xtest, D_gauss_lds, N_samples=100):
    print("Fitting Gaussian (Raw) LDS with %d states" % D_gauss_lds)
    model = DefaultLDS(n=D_gauss_lds, p=K)

    Xs_centered = [
        X - np.mean(X, axis=0)[None, :] + 1e-3 * np.random.randn(*X.shape)
        for X in Xs
    ]
    for X in Xs_centered:
        model.add_data(X)

    # TODO: Get initial pred ll
    init_results = (0, None, np.nan, np.nan, np.nan)

    def resample():
        tic = time.time()
        model.resample_model()
        toc = time.time() - tic

        # Monte Carlo sample to get pi density implied by Gaussian LDS
        Tpred = Xtest.shape[0]
        Npred = 1000

        preds = model.sample_predictions(Xs_centered[0], Tpred, Npred=Npred)

        # Convert predictions to a distribution by finding the
        # largest dimension for each predicted Gaussian.
        # Preds is T x K x Npred, inds is TxNpred
        inds = np.argmax(preds, axis=1)
        pi = np.array(
            [np.bincount(inds[t], minlength=K)
             for t in xrange(Tpred)]) / float(Npred)
        assert np.allclose(pi.sum(axis=1), 1.0)

        pi = np.clip(pi, 1e-8, 1.0)
        pi /= pi.sum(axis=1)[:, None]

        # Compute the log likelihood under pi
        pred_ll = np.sum([
            Multinomial(weights=pi[t], K=K).log_likelihood(Xtest[t][None, :])
            for t in xrange(Tpred)
        ])

        return toc, None, np.nan, \
            np.nan, \
            pred_ll

    n_retries = 0
    max_attempts = 5
    while n_retries < max_attempts:
        try:
            times, samples, lls, test_lls, pred_lls = \
                map(np.array, zip(*([init_results] + [resample() for _ in progprint_xrange(N_samples)])))
            timestamps = np.cumsum(times)
            return Results(lls, test_lls, pred_lls, samples, timestamps)
        except Exception as e:
            print("Caught exception: ", e.message)
            print("Retrying")
            n_retries += 1

    raise Exception("Failed to fit the Raw Gaussian LDS model in %d attempts" %
                    max_attempts)