Пример #1
0
def fit_full(model_name, J, j_ind, X, y, phi_true, m0, Q0, seed):
    """Fit full model and save the results."""

    print "Full model {} ...".format(model_name)

    data = dict(
        N=X.shape[0],
        D=X.shape[1],
        J=J,
        X=X,
        y=y,
        j_ind=j_ind + 1,
        mu_phi=m0,
        Omega_phi=Q0.T  # Q0 transposed in order to get C-contiguous
    )
    model = load_stan(model_name)

    # Sample and extract parameters
    with suppress_stdout():
        fit = model.sampling(data=data,
                             seed=seed,
                             chains=4,
                             iter=1000,
                             warmup=500,
                             thin=2)
    samp = fit.extract(pars='phi')['phi']
    m_phi_full = samp.mean(axis=0)
    var_phi_full = samp.var(axis=0, ddof=1)

    print "Full model sampled."

    if not os.path.exists('results'):
        os.makedirs('results')
    np.savez(
        'results/res_f_{}.npz'.format(model_name),
        phi_true=phi_true,
        m_phi_full=m_phi_full,
        var_phi_full=var_phi_full,
    )
Пример #2
0
def main(model_name, conf, ret_master=False):
    """Fit requested model with given configurations.
    
    Arg. `ret_master` can be used to prematurely exit and return the dep.Master
    object, which is useful for debuging.
    
    """
    
    # Ensure that the configurations class is used
    if not isinstance(conf, configurations):
        raise ValueError("Invalid arg. `conf`, use class fit.configurations")
    
    print("Configurations:")
    print('    ' + str(conf).replace('\n', '\n    '))
    
    # Localise few options
    J = conf.J
    D = conf.D
    K = conf.K
    
    # Import the model simulator module (import at runtime)
    model_module = getattr(__import__('models.'+model_name), model_name)
    model = model_module.model(J, D, conf.npg)
    
    # Simulate_data
    if conf.cor_input:
        data = model.simulate_data(Sigma_x='rand', seed=conf.seed_data)
    else:
        data = model.simulate_data(seed=conf.seed_data)
    
    # Calculate the uncertainty
    uncertainty_global, uncertainty_group = data.calc_uncertainty()
    
    # Get the prior
    S0, m0, Q0, r0 = model.get_prior()
    prior = {'Q':Q0, 'r':r0}
    
    # Set init_site to N(0,A**2/K I), where A = 10 * max(diag(S0))
    init_site = 10 * np.max(np.diag(S0))
    
    # Get parameter information
    pnames, pshapes, phiers = model.get_param_definitions()
    
    # Save true values
    if conf.save_true:
        if not os.path.exists(RES_PATH):
            os.makedirs(RES_PATH)
        if conf.id:
            filename = 'true_vals_{}_{}.npz'.format(model_name, conf.id)
        else:
            filename = 'true_vals_{}.npz'.format(model_name)
        np.savez(
            os.path.join(RES_PATH, filename),
            J = J,
            D = D,
            npg = conf.npg,
            seed = conf.seed_data,
            pnames = pnames,
            uncertainty_global = uncertainty_global,
            uncertainty_group = uncertainty_group,
            X_param = data.X_param,
            **data.true_values
        )
        print("True values saved into results")
    
    # ------------------------------------------------------
    #     Fit distributed model
    # ------------------------------------------------------
    if conf.method == 'both' or conf.method == 'distributed' or ret_master:
        
        print("Distributed model {} ...".format(model_name))
        
        # Options for the ep-algorithm see documentation of dep.method.Master
        dep_options = dict(
            prior = prior,
            seed = conf.seed_mcmc,
            prec_estim = conf.prec_estim,
            df0 = conf.damp,
            init_site = init_site,
            **conf.mc_opt
        )
        # Temp fix for the RandomState seed problem with pystan in 32bit Python
        dep_options['tmp_fix_32bit'] = TMP_FIX_32BIT
        
        if K < 2:
            raise ValueError("K should be at least 2.")
        
        elif K < J:
            # ------ Many groups per site: combine groups ------
            Nk, Nj_k, j_ind_k = distribute_groups(J, K, data.Nj)
            # Create the Master instance
            stan_model = load_stan(os.path.join(MOD_PATH, model_name))
            dep_master = Master(
                stan_model,
                data.X,
                data.y,
                A_k = {'J':Nj_k},
                A_n = {'j_ind':j_ind_k+1},
                site_sizes = Nk,
                **dep_options
            )
            # Construct the map: which site contribute to which parameter
            pmaps = _create_pmaps(phiers, J, K, Nj_k)
        
        elif K == J:
            # ------ One group per site ------
            # Create the Master instance
            dep_master = Master(
                load_stan(os.path.join(MOD_PATH, model_name+'_sg')),
                data.X,
                data.y,
                site_sizes=data.Nj,
                **dep_options
            )
            # Construct the map: which site contribute to which parameter
            pmaps = _create_pmaps(phiers, J, K, None)
        
        elif K <= data.N:
            # ------ Multiple sites per group: split groups ------
            Nk, Nk_j, _ = distribute_groups(J, K, data.Nj)
            # Create the Master instance
            dep_master = Master(
                load_stan(os.path.join(MOD_PATH, model_name+'_sg')),
                data.X,
                data.y,
                site_sizes=Nk,
                **dep_options
            )
            # Construct the map: which site contribute to which parameter
            pmaps = _create_pmaps(phiers, J, K, Nk_j)
        
        else:
            raise ValueError("K cant be greater than number of samples")
        
        if ret_master:
            print("Returning dep.Master")
            return dep_master
        
        # Run the algorithm for `EP_ITER` iterations
        print("Run distributed EP algorithm for {} iterations." \
              .format(conf.iter))
        m_phi_i, cov_phi_i, info = dep_master.run(
            conf.iter, save_last_fits=conf.mix)
        if info:
            # Save results until failure
            if conf.save_res:
                if not os.path.exists(RES_PATH):
                    os.makedirs(RES_PATH)
                if conf.id:
                    filename = 'res_d_{}_{}.npz'.format(model_name, conf.id)
                else:
                    filename = 'res_d_{}.npz'.format(model_name)
                np.savez(
                    os.path.join(RES_PATH, filename),
                    conf      = conf.__dict__,
                    m_phi_i   = m_phi_i,
                    cov_phi_i = cov_phi_i,
                    last_iter = dep_master.iter
                )
                print("Uncomplete distributed model results saved.")
            raise RuntimeError('Dep algorithm failed with error code: {}'
                               .format(info))
        
        if conf.mix:
            print("Form the final approximation "
                  "by mixing the last samples from all the sites.")
            cov_phi, m_phi = dep_master.mix_phi()
            
            # Get mean and var of inferred variables
            pms, pvars = dep_master.mix_pred(pnames, pmaps, pshapes)
            # Construct a dict of from these results
            presults = {}
            for i in range(len(pnames)):
                pname = pnames[i]
                presults['m_'+pname] = pms[i]
                presults['var_'+pname] = pvars[i]
        
        # Save results
        if conf.save_res:
            if not os.path.exists(RES_PATH):
                os.makedirs(RES_PATH)
            if conf.id:
                filename = 'res_d_{}_{}.npz'.format(model_name, conf.id)
            else:
                filename = 'res_d_{}.npz'.format(model_name)
            if conf.mix:
                np.savez(
                    os.path.join(RES_PATH, filename),
                    conf      = conf.__dict__,
                    m_phi_i   = m_phi_i,
                    cov_phi_i = cov_phi_i,
                    m_phi     = m_phi,
                    cov_phi   = cov_phi,
                    **presults
                )
            else:
                np.savez(
                    os.path.join(RES_PATH, filename),
                    conf      = conf.__dict__,
                    m_phi_i   = m_phi_i,
                    cov_phi_i = cov_phi_i,
                )
            print("Distributed model results saved.")
        
        # Release master object
        del dep_master
    
    # ------------------------------------------------------
    #     Fit full model
    # ------------------------------------------------------
    if conf.method == 'both' or conf.method == 'full':
        
        print("Full model {} ...".format(model_name))
        
        seed = np.random.RandomState(seed=conf.seed_mcmc)
        # Temp fix for the RandomState seed problem with pystan in 32bit Python
        seed = seed.randint(2**31-1) if TMP_FIX_32BIT else seed
        
        data = dict(
            N = data.X.shape[0],
            D = data.X.shape[1],
            J = J,
            X = data.X,
            y = data.y,
            j_ind = data.j_ind+1,
            mu_phi = m0,
            Omega_phi = Q0.T    # Q0 transposed in order to get C-contiguous
        )
        # Load model if not loaded already
        if not 'stan_model' in locals():
            stan_model = load_stan(os.path.join(MOD_PATH, model_name))
        
        # Sample and extract parameters
        with suppress_stdout():
            time_full = timer()
            fit = stan_model.sampling(
                data = data,
                seed = seed,
                **conf.mc_full_opt
            )
            time_full = (timer() - time_full)
        
        samp = fit.extract(pars='phi')['phi']
        nsamp = samp.shape[0]
        m_phi_full = samp.mean(axis=0)
        samp -= m_phi_full
        cov_phi_full = samp.T.dot(samp)
        cov_phi_full /= nsamp -1
        
        # Mean stepsize
        steps = [np.mean(p['stepsize__'])
                 for p in fit.get_sampler_params()]
        print('    sampling time {}'.format(time_full))
        print('    mean stepsize: {:.4}'.format(np.mean(steps)))
        # Max Rhat (from all but last row in the last column)
        print('    max Rhat: {:.4}'.format(
            np.max(fit.summary()['summary'][:-1,-1])
        ))
        
        # Get mean and var of inferred variables
        presults = {}
        for i in range(len(pnames)):
            pname = pnames[i]
            samp = fit.extract(pname)[pname]
            presults['m_'+pname+'_full'] = np.mean(samp, axis=0)
            presults['var_'+pname+'_full'] = np.var(samp, axis=0, ddof=1)
        
        # Save results
        if conf.save_res:
            if not os.path.exists(RES_PATH):
                os.makedirs(RES_PATH)
            if conf.id:
                filename = 'res_f_{}_{}.npz'.format(model_name, conf.id)
            else:
                filename = 'res_f_{}.npz'.format(model_name)
            np.savez(
                os.path.join(RES_PATH, filename),
                conf         = conf.__dict__,
                m_phi_full   = m_phi_full,
                cov_phi_full = cov_phi_full,
                **presults
            )
            print("Full model results saved.")
Пример #3
0
def main(filename='res.npz'):
    
    # ------------------------------------------------------
    #     Simulate data
    # ------------------------------------------------------
    
    # Set seed
    rnd_data = np.random.RandomState(seed=SEED_DATA)
    
    # Parameters
    # Number of observations for each group
    if hasattr(NPG, '__getitem__') and len(NPG) == 2:
        Nj = rnd_data.randint(NPG[0],NPG[1]+1, size=J)
    else:
        Nj = NPG*np.ones(J, dtype=np.int64)
    # Total number of observations
    N = np.sum(Nj)
    # Observation index limits for J groups
    j_lim = np.concatenate(([0], np.cumsum(Nj)))
    # Group indices for each sample
    j_ind = np.empty(N, dtype=np.int64)
    for j in xrange(J):
        j_ind[j_lim[j]:j_lim[j+1]] = j
    
    # Assign parameters
    alpha_j = MU + rnd_data.randn(J)*TAU
    phi_true = np.log([MU, TAU, BETA, SIGMA])
    dphi = 4  # Number of shared parameters
    
    # Simulate data
    # Truncated normal rejection sampling
    X = X_MU + rnd_data.randn(N)*X_STD
    xneg = X<0
    while np.any(xneg):
        X[xneg] = X_MU + rnd_data.randn(np.count_nonzero(xneg))*X_STD
        xneg = X<0
    f = alpha_j[j_ind] + X*BETA
    y = f + rnd_data.randn(N)*SIGMA
    yneg = y<0
    while np.any(yneg):
        y[yneg] = f[yneg] + rnd_data.randn(np.count_nonzero(yneg))*SIGMA
        yneg = y<0
    
    # ------------------------------------------------------
    #     Prior
    # ------------------------------------------------------
    
    # Moment parameters of the prior (transposed in order to get F-contiguous)
    S0 = np.diag(V0).T
    m0 = M0
    # Natural parameters of the prior
    Q0 = np.diag(np.ones(dphi)/V0).T
    r0 = M0/V0
    prior = {'Q':Q0, 'r':r0}
    
    # ------------------------------------------------------
    #     Distributed EP
    # ------------------------------------------------------
    
    print "Distributed model..."
    
    # Options for the ep-algorithm see documentation of dep.method.Master
    options = {
        'seed'       : SEED_MCMC,
        'init_prev'  : True,
        'prec_estim' : PREC_ESTIM,
        'chains'     : CHAINS,
        'iter'       : ITER,
        'warmup'     : WARMUP,
        'thin'       : THIN
    }
    
    # Temp fix for the RandomState seed problem with pystan in 32bit Python
    options['tmp_fix_32bit'] = TMP_FIX_32BIT
    
    model = load_stan('model')
    if K < 2:
        raise ValueError("K should be at least 2.")
    elif K < J:
        # ---- Many groups per site ----
        # Combine smallest pairs of consecutive groups until K has been reached
        Nk = Nj.tolist()
        Njd = (Nj[:-1]+Nj[1:]).tolist()
        Nj_k = [1]*J
        for _ in xrange(J-K):
            ind = Njd.index(min(Njd))
            if ind+1 < len(Njd):
                Njd[ind+1] += Nk[ind]
            if ind > 0:
                Njd[ind-1] += Nk[ind+1]
            Nk[ind] = Njd[ind]
            Nk.pop(ind+1)
            Njd.pop(ind)
            Nj_k[ind] += Nj_k[ind+1]
            Nj_k.pop(ind+1)
        Nk = np.array(Nk)                       # Number of samples per site
        Nj_k = np.array(Nj_k)                   # Number of groups per site
        j_ind_k = np.empty(N, dtype=np.int32)   # Within site group index
        k_lim = np.concatenate(([0], np.cumsum(Nj_k)))
        for k in xrange(K):
            for ji in xrange(Nj_k[k]):
                ki = ji + k_lim[k]
                j_ind_k[j_lim[ki]:j_lim[ki+1]] = ji        
        # Create the Master instance
        dep_master = Master(
            model,
            X,
            y,
            A_k={'J':Nj_k},
            A_n={'j_ind':j_ind_k+1},
            site_sizes=Nk,
            prior=prior,
            **options
        )
    elif K == J:
        # ---- One group per site ----
        # Create the Master instance
        dep_master = Master(
            model,
            X,
            y,
            A_k={'J': np.ones(K, dtype=np.int64)},
            A_n={'j_ind': np.ones(N, dtype=np.int64)},
            site_sizes=Nj,
            prior=prior,
            **options
        )
    elif K <= N:
        # ---- Multiple sites per group ----
        # Split biggest groups until enough sites are formed
        ppg = np.ones(J, dtype=np.int64)    # Parts per group
        Nj2 = Nj.astype(np.float)
        for _ in xrange(K-J):
            cur_max = Nj2.argmax()
            ppg[cur_max] += 1
            Nj2[cur_max] = Nj[cur_max]/ppg[cur_max]
        Nj2 = Nj//ppg
        rem = Nj%ppg
        # Form the number of samples for each site
        Nk = np.empty(K, dtype=np.int64)
        k = 0
        for j in xrange(J):
            for kj in xrange(ppg[j]):
                if kj < rem[j]:
                    Nk[k] = Nj2[j] + 1
                else:
                    Nk[k] = Nj2[j]
                k += 1
        # Create the Master instance
        dep_master = Master(
            model,
            X,
            y,
            A_k={'J': np.ones(K, dtype=np.int64)},
            A_n={'j_ind': np.ones(N, dtype=np.int64)},
            site_sizes=Nk,
            prior=prior,
            **options
        )
    else:
        raise ValueError("K cant be greater than number of samples")
    
    # Run the algorithm for `EP_ITER` iterations
    print "Run distributed EP algorithm for {} iterations.".format(EP_ITER)
    m_phi, cov_phi, info = dep_master.run(EP_ITER)
    var_phi = np.diagonal(cov_phi, axis1=1, axis2=2)
    if info:
        raise RuntimeError('Dep algorithm failed with error code: {}'
                               .format(info))
    print "Form the final approximation " \
          "by mixing the samples from all the sites."
    S_mix, m_mix = dep_master.mix_phi()
    var_mix = np.diag(S_mix)
    
    print "Distributed model sampled:"
    print "    exp(phi) = {}".format(np.array2string(np.exp(m_mix), precision=1))
    print "True values:"
    print "    exp(phi) = {}".format([MU, TAU, BETA, SIGMA])
    
    # ------------------------------------------------------
    #     Save results
    # ------------------------------------------------------
    
    np.savez(filename,
        seed_data=SEED_DATA,
        seed_mcmc=SEED_MCMC,
        J=J,
        K=K,
        Nj=Nj,
        N=N,
        dphi=dphi,
        niter=EP_ITER,
        m0=M0,
        V0=V0,
        phi_true=phi_true,
        m_phi=m_phi,
        var_phi=var_phi,
        m_mix=m_mix,
        var_mix=var_mix
    )
Пример #4
0
def main(filename='res_full.npz'):
    
    # ------------------------------------------------------
    #     Simulate data
    # ------------------------------------------------------
    
    # Set seed
    rnd_data = np.random.RandomState(seed=SEED_DATA)
    
    # Parameters
    # Number of observations for each group
    if hasattr(NPG, '__getitem__') and len(NPG) == 2:
        Nj = rnd_data.randint(NPG[0],NPG[1]+1, size=J)
    else:
        Nj = NPG*np.ones(J, dtype=np.int64)
    # Total number of observations
    N = np.sum(Nj)
    # Observation index limits for J groups
    j_lim = np.concatenate(([0], np.cumsum(Nj)))
    # Group indices for each sample
    j_ind = np.empty(N, dtype=np.int64)
    for j in xrange(J):
        j_ind[j_lim[j]:j_lim[j+1]] = j
    
    # Assign parameters
    alpha_j = MU + rnd_data.randn(J)*TAU
    phi_true = np.log([MU, TAU, BETA, SIGMA])
    dphi = 4  # Number of shared parameters
    
    # Simulate data
    # Truncated normal rejection sampling
    X = X_MU + rnd_data.randn(N)*X_STD
    xneg = X<0
    while np.any(xneg):
        X[xneg] = X_MU + rnd_data.randn(np.count_nonzero(xneg))*X_STD
        xneg = X<0
    f = alpha_j[j_ind] + X*BETA
    y = f + rnd_data.randn(N)*SIGMA
    yneg = y<0
    while np.any(yneg):
        y[yneg] = f[yneg] + rnd_data.randn(np.count_nonzero(yneg))*SIGMA
        yneg = y<0
    
    # ------------------------------------------------------
    #     Prior
    # ------------------------------------------------------
    
    # Moment parameters of the prior (transposed in order to get F-contiguous)
    S0 = np.diag(V0).T
    m0 = M0
    # Natural parameters of the prior
    Q0 = np.diag(np.ones(dphi)/V0).T
    r0 = M0/V0
    prior = {'Q':Q0, 'r':r0}
    
    # ------------------------------------------------------
    #     Full model
    # ------------------------------------------------------
    
    print "Full model..."
    
    # Set seed
    rnd_mcmc = np.random.RandomState(seed=SEED_MCMC)
    
    data = dict(
        N=N,
        J=J,
        X=X,
        y=y,
        j_ind=j_ind+1,
        mu_phi=m0,
        Omega_phi=Q0.T    # Q0 transposed in order to get C-contiguous
    )
    
    # Sample and extract parameters
    model = load_stan('model')
    fit = model.sampling(
        data=data,
        seed=(rnd_mcmc.randint(2**31-1) if TMP_FIX_32BIT else rnd_mcmc),
        chains=CHAINS,
        iter=ITER,
        warmup=WARMUP,
        thin=THIN
    )
    samp = fit.extract(pars='phi')['phi']
    m_phi_full = samp.mean(axis=0)
    var_phi_full = samp.var(axis=0, ddof=1)
    
    print "Full model sampled:"
    print "    exp(phi) = {}" \
          .format(np.array2string(np.exp(m_phi_full), precision=1))
    print "True values:"
    print "    exp(phi) = {}".format([MU, TAU, BETA, SIGMA])
    
    # ------------------------------------------------------
    #     Save results
    # ------------------------------------------------------
    
    np.savez(filename,
        seed_data=SEED_DATA,
        seed_mcmc=SEED_MCMC,
        J=J,
        K=K,
        Nj=Nj,
        N=N,
        dphi=dphi,
        m0=M0,
        V0=V0,
        phi_true=phi_true,
        m_phi_full=m_phi_full,
        var_phi_full=var_phi_full
    )
Пример #5
0
def fit_distributed(model_name, niter, J, K, Nj, X, y, phi_true, options):
    """Fit distributed model and save the results."""

    print "Distributed model {} ...".format(model_name)

    N = Nj.sum()

    if K < 2:
        raise ValueError("K should be at least 2.")
    elif K < J:
        # ---- Many groups per site ----
        # Combine smallest pairs of consecutive groups until K has been reached
        j_lim = np.concatenate(([0], np.cumsum(Nj)))
        Nk = Nj.tolist()
        Njd = (Nj[:-1] + Nj[1:]).tolist()
        Nj_k = [1] * J
        for _ in xrange(J - K):
            ind = Njd.index(min(Njd))
            if ind + 1 < len(Njd):
                Njd[ind + 1] += Nk[ind]
            if ind > 0:
                Njd[ind - 1] += Nk[ind + 1]
            Nk[ind] = Njd[ind]
            Nk.pop(ind + 1)
            Njd.pop(ind)
            Nj_k[ind] += Nj_k[ind + 1]
            Nj_k.pop(ind + 1)
        Nk = np.array(Nk)  # Number of samples per site
        Nj_k = np.array(Nj_k)  # Number of groups per site
        j_ind_k = np.empty(N, dtype=np.int32)  # Within site group index
        k_lim = np.concatenate(([0], np.cumsum(Nj_k)))
        for k in xrange(K):
            for ji in xrange(Nj_k[k]):
                ki = ji + k_lim[k]
                j_ind_k[j_lim[ki]:j_lim[ki + 1]] = ji
        # Create the Master instance
        model = load_stan(model_name)
        dep_master = Master(model,
                            X,
                            y,
                            A_k={'J': Nj_k},
                            A_n={'j_ind': j_ind_k + 1},
                            site_sizes=Nk,
                            **options)
    elif K == J:
        # ---- One group per site ----
        # Create the Master instance
        model_single_group = load_stan(model_name + '_sg')
        dep_master = Master(model_single_group, X, y, site_sizes=Nj, **options)
    elif K <= N:
        # ---- Multiple sites per group ----
        # Split biggest groups until enough sites are formed
        ppg = np.ones(J, dtype=np.int64)  # Parts per group
        Nj2 = Nj.astype(np.float)
        for _ in xrange(K - J):
            cur_max = Nj2.argmax()
            ppg[cur_max] += 1
            Nj2[cur_max] = Nj[cur_max] / ppg[cur_max]
        Nj2 = Nj // ppg
        rem = Nj % ppg
        # Form the number of samples for each site
        Nk = np.empty(K, dtype=np.int64)
        k = 0
        for j in xrange(J):
            for kj in xrange(ppg[j]):
                if kj < rem[j]:
                    Nk[k] = Nj2[j] + 1
                else:
                    Nk[k] = Nj2[j]
                k += 1
        # Create the Master instance
        model_single_group = load_stan(model_name + '_sg')
        dep_master = Master(model_single_group, X, y, site_sizes=Nk, **options)
    else:
        raise ValueError("K cant be greater than number of samples")

    # Run the algorithm for `niter` iterations
    print "Run distributed EP algorithm for {} iterations.".format(niter)
    m_phi, var_phi = dep_master.run(niter)
    print "Form the final approximation " \
          "by mixing the samples from all the sites."
    S_mix, m_mix = dep_master.mix_samples()
    var_mix = np.diag(S_mix)

    print "Distributed model sampled."

    if not os.path.exists('results'):
        os.makedirs('results')
    np.savez(
        'results/res_d_{}.npz'.format(model_name),
        phi_true=phi_true,
        m_phi=m_phi,
        var_phi=var_phi,
        m_mix=m_mix,
        var_mix=var_mix,
    )
Пример #6
0
def main(model_name, conf, ret_master=False):
    """Fit requested model with given configurations.
    
    Arg. `ret_master` can be used to prematurely exit and return the dep.Master
    object, which is useful for debuging.
    
    """
    
    # Ensure that the configurations class is used
    if not isinstance(conf, configurations):
        raise ValueError("Invalid arg. `conf`, use class fit.configurations")
    
    print "Configurations:"
    print '    ' + str(conf).replace('\n', '\n    ')
    
    # Localise few options
    J = conf.J
    D = conf.D
    K = conf.K
    
    # Import the model simulator module (import at runtime)
    model_module = getattr(__import__('models.'+model_name), model_name)
    model = model_module.model(J, D, conf.npg)
    
    # Simulate_data
    if conf.cor_input:
        data = model.simulate_data(Sigma_x='rand', seed=conf.seed_data)
    else:
        data = model.simulate_data(seed=conf.seed_data)
    
    # Calculate the uncertainty
    uncertainty_global, uncertainty_group = data.calc_uncertainty()
    
    # Get the prior
    S0, m0, Q0, r0 = model.get_prior()
    prior = {'Q':Q0, 'r':r0}
    
    # Get parameter information
    pnames, pshapes, phiers = model.get_param_definitions()
    
    # Save true values
    if conf.save_true:
        if not os.path.exists(RES_PATH):
            os.makedirs(RES_PATH)
        if conf.id:
            filename = 'true_vals_{}_{}.npz'.format(model_name, conf.id)
        else:
            filename = 'true_vals_{}.npz'.format(model_name)
        np.savez(
            os.path.join(RES_PATH, filename),
            J = J,
            D = D,
            npg = conf.npg,
            seed = conf.seed_data,
            pnames = pnames,
            uncertainty_global = uncertainty_global,
            uncertainty_group = uncertainty_group,
            X_param = data.X_param,
            **data.true_values
        )
        print "True values saved into results"
    
    # ------------------------------------------------------
    #     Fit distributed model
    # ------------------------------------------------------
    if conf.method == 'both' or conf.method == 'distributed' or ret_master:
        
        print "Distributed model {} ...".format(model_name)
        
        # Options for the ep-algorithm see documentation of dep.method.Master
        dep_options = dict(
            prior = prior,
            seed = conf.seed_mcmc,
            prec_estim = conf.prec_estim,
            **conf.mc_opt
        )
        # Temp fix for the RandomState seed problem with pystan in 32bit Python
        dep_options['tmp_fix_32bit'] = TMP_FIX_32BIT
        
        if K < 2:
            raise ValueError("K should be at least 2.")
        
        elif K < J:
            # ------ Many groups per site: combine groups ------
            Nk, Nj_k, j_ind_k = distribute_groups(J, K, data.Nj)
            # Create the Master instance
            stan_model = load_stan(os.path.join(MOD_PATH, model_name))
            dep_master = Master(
                stan_model,
                data.X,
                data.y,
                A_k = {'J':Nj_k},
                A_n = {'j_ind':j_ind_k+1},
                site_sizes = Nk,
                **dep_options
            )
            # Construct the map: which site contribute to which parameter
            pmaps = _create_pmaps(phiers, J, K, Nj_k)
        
        elif K == J:
            # ------ One group per site ------
            # Create the Master instance
            dep_master = Master(
                load_stan(os.path.join(MOD_PATH, model_name+'_sg')),
                data.X,
                data.y,
                site_sizes=data.Nj,
                **dep_options
            )
            # Construct the map: which site contribute to which parameter
            pmaps = _create_pmaps(phiers, J, K, None)
        
        elif K <= data.N:
            # ------ Multiple sites per group: split groups ------
            Nk, Nk_j, _ = distribute_groups(J, K, data.Nj)
            # Create the Master instance
            dep_master = Master(
                load_stan(os.path.join(MOD_PATH, model_name+'_sg')),
                data.X,
                data.y,
                site_sizes=Nk,
                **dep_options
            )
            # Construct the map: which site contribute to which parameter
            pmaps = _create_pmaps(phiers, J, K, Nk_j)
        
        else:
            raise ValueError("K cant be greater than number of samples")
        
        if ret_master:
            print "Returning dep.Master"
            return dep_master
        
        # Run the algorithm for `EP_ITER` iterations
        print "Run distributed EP algorithm for {} iterations." \
              .format(conf.iter)
        m_phi_i, cov_phi_i, info = dep_master.run(conf.iter)
        if info:
            # Save results until failure
            if conf.save_res:
                if not os.path.exists(RES_PATH):
                    os.makedirs(RES_PATH)
                if conf.id:
                    filename = 'res_d_{}_{}.npz'.format(model_name, conf.id)
                else:
                    filename = 'res_d_{}.npz'.format(model_name)
                np.savez(
                    os.path.join(RES_PATH, filename),
                    conf      = conf.__dict__,
                    m_phi_i   = m_phi_i,
                    cov_phi_i = cov_phi_i,
                    last_iter = dep_master.iter
                )
                print "Uncomplete distributed model results saved."
            raise RuntimeError('Dep algorithm failed with error code: {}'
                               .format(info))
        print "Form the final approximation " \
              "by mixing the samples from all the sites."
        cov_phi, m_phi = dep_master.mix_phi()
        
        # Get mean and var of inferred variables
        pms, pvars = dep_master.mix_pred(pnames, pmaps, pshapes)
        # Construct a dict of from these results
        presults = {}
        for i in xrange(len(pnames)):
            pname = pnames[i]
            presults['m_'+pname] = pms[i]
            presults['var_'+pname] = pvars[i]
        
        # Save results
        if conf.save_res:
            if not os.path.exists(RES_PATH):
                os.makedirs(RES_PATH)
            if conf.id:
                filename = 'res_d_{}_{}.npz'.format(model_name, conf.id)
            else:
                filename = 'res_d_{}.npz'.format(model_name)
            np.savez(
                os.path.join(RES_PATH, filename),
                conf      = conf.__dict__,
                m_phi_i   = m_phi_i,
                cov_phi_i = cov_phi_i,
                m_phi     = m_phi,
                cov_phi   = cov_phi,
                **presults
            )
            print "Distributed model results saved."
        
        # Release master object
        del dep_master
    
    # ------------------------------------------------------
    #     Fit full model
    # ------------------------------------------------------
    if conf.method == 'both' or conf.method == 'full':
        
        print "Full model {} ...".format(model_name)
        
        seed = np.random.RandomState(seed=conf.seed_mcmc)
        # Temp fix for the RandomState seed problem with pystan in 32bit Python
        seed = seed.randint(2**31-1) if TMP_FIX_32BIT else seed
        
        data = dict(
            N = data.X.shape[0],
            D = data.X.shape[1],
            J = J,
            X = data.X,
            y = data.y,
            j_ind = data.j_ind+1,
            mu_phi = m0,
            Omega_phi = Q0.T    # Q0 transposed in order to get C-contiguous
        )
        # Load model if not loaded already
        if not 'stan_model' in locals():
            stan_model = load_stan(os.path.join(MOD_PATH, model_name))
        
        # Sample and extract parameters
        with suppress_stdout():
            fit = stan_model.sampling(
                data = data,
                seed = seed,
                **conf.mc_full_opt
            )
        samp = fit.extract(pars='phi')['phi']
        nsamp = samp.shape[0]
        m_phi_full = samp.mean(axis=0)
        samp -= m_phi_full
        cov_phi_full = samp.T.dot(samp)
        cov_phi_full /= nsamp -1
        
        # Mean stepsize
        steps = [np.mean(p['stepsize__'])
                 for p in fit.get_sampler_params()]
        print '    mean stepsize: {:.4}'.format(np.mean(steps))
        # Max Rhat (from all but last row in the last column)
        print '    max Rhat: {:.4}'.format(
            np.max(fit.summary()['summary'][:-1,-1])
        )
        
        # Get mean and var of inferred variables
        presults = {}
        for i in xrange(len(pnames)):
            pname = pnames[i]
            samp = fit.extract(pname)[pname]
            presults['m_'+pname+'_full'] = np.mean(samp, axis=0)
            presults['var_'+pname+'_full'] = np.var(samp, axis=0, ddof=1)
        
        # Save results
        if conf.save_res:
            if not os.path.exists(RES_PATH):
                os.makedirs(RES_PATH)
            if conf.id:
                filename = 'res_f_{}_{}.npz'.format(model_name, conf.id)
            else:
                filename = 'res_f_{}.npz'.format(model_name)
            np.savez(
                os.path.join(RES_PATH, filename),
                conf         = conf.__dict__,
                m_phi_full   = m_phi_full,
                cov_phi_full = cov_phi_full,
                **presults
            )
            print "Full model results saved."
Пример #7
0
def main(filename='res_full.npz'):

    # ------------------------------------------------------
    #     Simulate data
    # ------------------------------------------------------

    # Set seed
    rnd_data = np.random.RandomState(seed=SEED_DATA)

    # Parameters
    # Number of observations for each group
    if hasattr(NPG, '__getitem__') and len(NPG) == 2:
        Nj = rnd_data.randint(NPG[0], NPG[1] + 1, size=J)
    else:
        Nj = NPG * np.ones(J, dtype=np.int64)
    # Total number of observations
    N = np.sum(Nj)
    # Observation index limits for J groups
    j_lim = np.concatenate(([0], np.cumsum(Nj)))
    # Group indices for each sample
    j_ind = np.empty(N, dtype=np.int64)
    for j in xrange(J):
        j_ind[j_lim[j]:j_lim[j + 1]] = j

    # Assign parameters
    alpha_j = MU + rnd_data.randn(J) * TAU
    phi_true = np.log([MU, TAU, BETA, SIGMA])
    dphi = 4  # Number of shared parameters

    # Simulate data
    # Truncated normal rejection sampling
    X = X_MU + rnd_data.randn(N) * X_STD
    xneg = X < 0
    while np.any(xneg):
        X[xneg] = X_MU + rnd_data.randn(np.count_nonzero(xneg)) * X_STD
        xneg = X < 0
    f = alpha_j[j_ind] + X * BETA
    y = f + rnd_data.randn(N) * SIGMA
    yneg = y < 0
    while np.any(yneg):
        y[yneg] = f[yneg] + rnd_data.randn(np.count_nonzero(yneg)) * SIGMA
        yneg = y < 0

    # ------------------------------------------------------
    #     Prior
    # ------------------------------------------------------

    # Moment parameters of the prior (transposed in order to get F-contiguous)
    S0 = np.diag(V0).T
    m0 = M0
    # Natural parameters of the prior
    Q0 = np.diag(np.ones(dphi) / V0).T
    r0 = M0 / V0
    prior = {'Q': Q0, 'r': r0}

    # ------------------------------------------------------
    #     Full model
    # ------------------------------------------------------

    print "Full model..."

    # Set seed
    rnd_mcmc = np.random.RandomState(seed=SEED_MCMC)

    data = dict(
        N=N,
        J=J,
        X=X,
        y=y,
        j_ind=j_ind + 1,
        mu_phi=m0,
        Omega_phi=Q0.T  # Q0 transposed in order to get C-contiguous
    )

    # Sample and extract parameters
    model = load_stan('model')
    fit = model.sampling(
        data=data,
        seed=(rnd_mcmc.randint(2**31 - 1) if TMP_FIX_32BIT else rnd_mcmc),
        chains=CHAINS,
        iter=ITER,
        warmup=WARMUP,
        thin=THIN)
    samp = fit.extract(pars='phi')['phi']
    m_phi_full = samp.mean(axis=0)
    var_phi_full = samp.var(axis=0, ddof=1)

    print "Full model sampled:"
    print "    exp(phi) = {}" \
          .format(np.array2string(np.exp(m_phi_full), precision=1))
    print "True values:"
    print "    exp(phi) = {}".format([MU, TAU, BETA, SIGMA])

    # ------------------------------------------------------
    #     Save results
    # ------------------------------------------------------

    np.savez(filename,
             seed_data=SEED_DATA,
             seed_mcmc=SEED_MCMC,
             J=J,
             K=K,
             Nj=Nj,
             N=N,
             dphi=dphi,
             m0=M0,
             V0=V0,
             phi_true=phi_true,
             m_phi_full=m_phi_full,
             var_phi_full=var_phi_full)