Пример #1
0
def GVA(model, max_steps=10, mu=None, L=None, nb_samples=100):

    if mu is None:
        mu = np.zeros(model.size)
    if L is None:
        L = 2 * np.eye(model.size)

    for i in range(max_steps):
        eta = np.random.randn(model.size, nb_samples)

        nlp_in = mu + np.transpose(mat.exp(L) @ eta)

        elbo_grad_L = 0
        elbo_grad_mu = 0
        for iter in range(nlp_in.shape[0]):
            elbo_grad_mu -= model.neg_log_posterior_grad(
                nlp_in[iter, :]) / nb_samples
            elbo_grad_L += mat.exp(2 * L) @ model.neg_log_posterior_hessian(
                nlp_in[iter, :])

        elbo_grad_L = -mat.sym(elbo_grad_L / nb_samples) + np.eye(len(mu))

        mu += 1e-3 * elbo_grad_mu / iter
        L += 1e-3 * elbo_grad_L / iter

        update_progress((i + 1) / max_steps)

    # return values
    return mu, L
def stochastic_gd(model,
                  step_size=1e-4,
                  max_iter=100,
                  trace=False,
                  RETURN=False,
                  save=True):

    X = model.data.copy()
    y = model.response.copy()

    initial = np.random.rand(model.size)
    #dirty fix for the variance
    initial[0] += 1

    trace_theta = []
    trace_energy = []

    theta = initial
    start = time.time()

    fun = model.neg_log_posterior
    grad_fun = model.neg_log_posterior_grad

    batch_size = 50
    for i in range(max_iter):

        #compute gradient on only a subset of the data points
        for y_batch, x_batch in batch_iter(y, X, batch_size=batch_size):
            #update and append
            theta = theta - step_size * grad_fun(theta, X=x_batch, y=y_batch)
            trace_theta.append(theta)
            trace_energy.append(fun(theta))

        #convergence check
        if len(trace_theta) > 3:
            if abs(fun(theta) - fun(trace_theta[-2])) < 1e-6:
                update_progress(
                    1, message="early convergence at {} iterations".format(i))
                break

        if i % 5 == 0 or i == max_iter - 1:
            update_progress((i + 1) / max_iter)

    end = time.time()
    print("  duration: {}".format(
        str(datetime.timedelta(seconds=round(end - start)))))

    if trace:
        return trace_energy, trace_theta
    elif RETURN:
        return theta, i + 1
def vanilla_gd(model,
               max_iter=10,
               step_size=1e-4,
               initial=None,
               trace=False,
               RETURN=False,
               save=True):

    if initial is None:
        initial = np.random.randn(model.size)
        #set the variance to a positive parameter
        initial[0] = 1

    fun = model.neg_log_posterior
    grad_fun = model.neg_log_posterior_grad
    trace_energy = [fun(initial)]
    trace_theta = [initial]

    start = time.time()
    theta = initial
    for i in range(max_iter):
        #update and append
        theta = theta - step_size * grad_fun(theta)
        trace_theta.append(theta)
        trace_energy.append(fun(theta))

        #convergence check
        if abs(fun(theta) - fun(trace_theta[-2])) < 1e-6:
            update_progress(
                1, message="early convergence at {} iterations".format(i))
            break

        if i % 5 == 0 or i == max_iter - 1:
            update_progress((i + 1) / max_iter)
    end = time.time()
    print("  duration: {}".format(
        str(datetime.timedelta(seconds=round(end - start)))))

    if save:
        model.results["gd"] = theta

    if trace:
        return trace_theta, trace_energy
    elif RETURN:
        return theta, i + 1
def newton_gd(model, initial=None, max_iter=10, trace=False, RETURN=False):

    raise Warning("unstable method, should be fixed if time")
    fun = model.neg_log_posterior
    grad_fun = model.neg_log_posterior_grad
    hes_fun = model.neg_log_posterior_hessian

    if initial is None:
        initial = np.ones(model.size)
        initial[0] = 2

    if trace:
        trace_thetas = [initial]
        trace_energy = [fun(initial)]

    start = time.time()
    theta = initial.copy()
    for i in range(max_iter):
        v = np.dot(np.linalg.inv(hes_fun(theta)), grad_fun(theta))
        cand = theta - v
        if trace:
            trace_thetas.append(cand)
            trace_energy.append(fun(cand))
        theta = cand
        if i % 5 == 0 or i == max_iter - 1:
            update_progress((i + 1) / max_iter)

    end = time.time()
    print("  duration: {}".format(
        str(datetime.timedelta(seconds=round(end - start)))))

    model.results["Newton_gd"] = theta

    if trace:
        return trace_thetas, trace_energy
    if RETURN:
        return theta
def MH_whithin_Gibbs(model,
                     verbose=False,
                     verbose_gen=True,
                     RETURN=False,
                     **kwargs):
    ''' Metropolis within Gibbs, update a batch of parameters at the same time,
    not necessarly the all vector
    use the same input as MH_vanilla
    '''

    #get the size of the parameter to simulate
    size = model.size

    #initial point
    if "initial" in kwargs.keys():
        current = np.array(kwargs["initial"])
    else:
        current = np.ones(size)  #np.random.randn(size)

    #step size
    if 'step_size' in kwargs.keys():
        step_size = kwargs["step_size"]
    else:
        step_size = 0.04
        print("default step size selected : {}".format(step_size))

    #number of iterations
    if 'max_iter' in kwargs.keys():
        max_iter = kwargs["max_iter"]
    else:
        max_iter = 10

    if 'batch' in kwargs.keys():
        batch = kwargs["batch"]
    else:
        batch = 5
        print("default batch is {}".format(batch))

    #create empty containers
    samples = np.zeros([max_iter, size])
    record_acceptance = np.zeros(max_iter)

    #performance measures
    start = time.time()

    #actual MCMC simulation
    for k in range(max_iter):

        #choose an index to update
        indices = np.random.randint(len(current), size=batch)
        #update the sample accordingly
        step = np.zeros_like(current)
        for index in indices:
            step[index] = np.random.randn()

        proposal = current + step_size * step

        #compute its acceptance ratio
        ratio = np.exp(model.log_posterior(proposal) \
                        - model.log_posterior(current))

        #check if accepted
        threshold = np.random.random()
        if ratio > threshold:
            current = proposal

        #update samples an acceptance accordingly
        samples[k, :] = current
        record_acceptance[k] = (ratio > threshold)

        if verbose_gen == True:
            if k % 5 == 0 or k == max_iter - 1:
                update_progress((k + 1) / max_iter)

    # saving the data
    #defining the burnin parameter
    if "burning" in kwargs.keys():
        burning = kwargs["burning"]
    else:
        burning = int(max_iter / 10)

    #saving the estimates in the model
    ## NOTE: could be generalized if time
    covariance = np.cov(samples[burning:].T)
    model.results["MH_Gibbs_mean"] = [
        np.mean(samples[burning:], axis=0), covariance
    ]

    end = time.time()
    if verbose:
        print(" Acceptance rate : {:2.1%}  (advised values between 10% and 50%)"\
                    .format(np.mean(record_acceptance)))
        print("  duration: {}".format(
            str(datetime.timedelta(seconds=round(end - start)))))

    model.time["MH_Gibbs"] = [end - start, max_iter]
    if RETURN:
        if "acc" in kwargs.keys():
            if kwargs["acc"] == True:
                return samples, np.mean(record_acceptance)
        return samples
def Langevin_MH(model,
                tau,
                verbose=True,
                verbose_gen=True,
                RETURN=False,
                **kwargs):

    size = model.size

    if "initial" in kwargs.keys():
        current = np.array(kwargs["initial"])
    else:
        current = np.ones(size)

    if 'max_iter' in kwargs.keys():
        max_iter = kwargs["max_iter"]
    else:
        max_iter = 10
    if "step_size" in kwargs.keys():
        step_size = kwargs["step_size"]
    else:
        step_size = 1

    samples = np.zeros([max_iter, size])
    record_acceptance = np.zeros(max_iter)

    start = time.time()

    def log_qprop(X, Y, tau, grad):
        # probability to go to X given Y
        R = -1 / (4 * tau) * np.linalg.norm(X - Y - tau * grad(Y))**2
        return R

    for k in range(max_iter):
        #compute the proposal based on the Langevin update
        proposal = current + step_size*(tau*model.log_posterior_grad(current)+\
                     sqrt(2*tau)*np.random.randn(size))

        #dirty trick to avoid proposition for negative variance
        # REVIEW: code it in distribution, but beware it could break down other computation
        if proposal[0] > 0 or model.name\
                    == "Conditional model : Multilogistic,  Prior : gaussian":
            #comppute acceptance ratio
            ratio = model.log_posterior(proposal) -\
                    model.log_posterior(current)
            ratio = ratio - log_qprop(current, proposal, tau,
                                      model.log_posterior_grad)
            ratio += log_qprop(proposal, current, tau,
                               model.log_posterior_grad)
            ratio = np.exp(ratio)
            threshold = np.random.random()
        else:
            #otherwise since the proposed value is non valid
            #we won't accept it
            ratio = 0
        if ratio > threshold:
            current = proposal

        #record the new samples
        samples[k, :] = current
        record_acceptance[k] = (ratio > threshold)

        if verbose_gen == True:
            if k % 5 == 0 or k == max_iter - 1:
                update_progress((k + 1) / max_iter)
    if "burning" in kwargs.keys():
        burning = kwargs["burning"]
    else:
        burning = int(max_iter / 10)
    covariance = np.cov(samples[burning:].T)
    model.results["MH_Langevin"] = [
        np.mean(samples[burning:], axis=0), covariance
    ]

    if verbose:
        print(" Acceptance rate : {:2.1%} \
                (advised values between 10% and 50%)"\
                    .format(np.mean(record_acceptance)))

    end = time.time()
    if verbose:
        print("  duration: {}".format(
            str(datetime.timedelta(seconds=round(end - start)))))

    model.time["MH_langevin"] = [end - start, max_iter]
    if RETURN:
        if "acc" in kwargs.keys():
            if kwargs["acc"] == True:
                return samples, np.mean(record_acceptance)
        return samples
def random_walk_MH(model,
                   verbose=False,
                   verbose_gen=True,
                   RETURN=False,
                   **kwargs):
    """Metropolis Hastings sampling algortihm.

    Parameters
    ----------
    step_size : float
        step_size of the Markov Chain (the default is 0.05).
    max_iter : type
        maximum of iterations of the algortihm (the default is 100).
    verbose: bool
        to print the acceptance rate
    **kwargs : type
        size: float
            size of the beta parameter of the log_posterior_function
        initial: ndarray
            starting point of the algorithm (optional)
        acc: bool
            to return the Acceptance rate of the Markov Chain


    Returns
    -------
    nd_array
        simulated samples from the post of model of size max_iter

    Examples
    -------
    >>> gaussian_model = Model(Gaussian_prior,Gaussian, ... )
    >>> samples = MH_sampling(gaussian_model, max_iter = 300 ,
                                step_size = 0.06, size = 17)

    """

    #get the size of the parameter to simulate
    size = model.size

    #initial point
    if "initial" in kwargs.keys():
        current = np.array(kwargs["initial"])
    else:
        current = np.ones(size)  #np.random.randn(size)

    #step size
    if 'step_size' in kwargs.keys():
        step_size = kwargs["step_size"]
    else:
        step_size = 0.04
        print("default step size selected : {}".format(step_size))

    #number of iterations
    if 'max_iter' in kwargs.keys():
        max_iter = kwargs["max_iter"]
    else:
        max_iter = 10

    #create empty containers
    samples = np.zeros([max_iter, size])
    record_acceptance = np.zeros(max_iter)

    #performance measures
    start = time.time()

    #actual MCMC simulation
    for k in range(max_iter):

        #update the current sample
        proposal = current + step_size * np.random.randn(size)

        #compute its acceptance ratio
        ratio = np.exp(model.log_posterior(proposal) \
                        - model.log_posterior(current))

        #check if accepted
        threshold = np.random.random()
        if ratio > threshold:
            current = proposal

        #update samples an acceptance accordingly
        samples[k, :] = current
        record_acceptance[k] = (ratio > threshold)

        if verbose_gen == True:
            if k % 5 == 0 or k == max_iter - 1:
                update_progress((k + 1) / max_iter)

    # saving the data
    #defining the burnin parameter
    if "burning" in kwargs.keys():
        burning = kwargs["burning"]
    else:
        burning = int(max_iter / 10)

    #saving the estimates in the model
    ## NOTE: could be generalized if time
    covariance = np.cov(samples[burning:].T)
    model.results["MH_vanilla"] = [
        np.mean(samples[burning:], axis=0), covariance
    ]

    end = time.time()
    if verbose:
        print(" Acceptance rate : {:2.1%}  (advised values between 10% and 50%)"\
                    .format(np.mean(record_acceptance)))
        print("  duration: {}".format(
            str(datetime.timedelta(seconds=round(end - start)))))

    model.time["MH_vanilla"] = [end - start, max_iter]
    if RETURN:
        if "acc" in kwargs.keys():
            if kwargs["acc"] == True:
                return samples, np.mean(record_acceptance)
        return samples
def line_search_gd(model,
                   x0=None,
                   lambda_=1e-4,
                   alpha=0.2,
                   beta=0.5,
                   max_iter=20,
                   epsilon=1e-4,
                   trace=False,
                   RETURN=False,
                   save=True):

    f = model.neg_log_posterior
    df = model.neg_log_posterior_grad
    if x0 is None:
        x0 = np.random.randn(model.size)
        x0[0:2] = 1
    values = [x0]
    energies = [f(x0)]

    old = x0
    start = time.time()
    for i in range(max_iter):

        gradient = df(old)
        l = lambda_

        candidate = old - l * gradient

        #security measure
        j = 0

        #acceptance criterion
        while f(candidate) > f(old) - l * alpha * np.linalg.norm(gradient)**2:
            l *= beta
            candidate = old - l * gradient
            j += 1
            if j > 100:
                print("more than 100 iterations to adjust the step size")
                break

        #check for early convergence criterion
        if abs(f(candidate) - f(old)) < 1e-6:
            update_progress(
                1, message="early convergence at {} iterations".format(i))
            break

        values = np.concatenate((values, candidate.reshape(1, len(candidate))))
        energies.append(f(candidate))

        old = candidate
        if i % 5 == 0 or i == max_iter - 1:
            update_progress((i + 1) / max_iter)

    end = time.time()
    print("  duration: {}".format(
        str(datetime.timedelta(seconds=round(end - start)))))
    if save:
        model.results["line_search_gd"] = old

    if trace:
        return values, energies
    if RETURN:
        return old, i + 1
def Wolfe_cond_gd(model,
                  lambda_0=None,
                  initial=None,
                  max_iter=10,
                  trace=False,
                  RETURN=False,
                  save=True,
                  c1=1e-4,
                  c2=0.9,
                  beta_C1=0.9,
                  beta_C2=1.1):

    if initial is None:
        initial = np.ones(model.size)
    if lambda_0 is None:
        lambda_0 = 1e-3

    # trace for vizualization purpose

    trace_theta = [initial]
    trace_lambdas = [lambda_0]
    trace_energy = [model.neg_log_posterior(initial)]

    theta = initial.copy()
    step_size = lambda_0

    start = time.time()

    for i in range(max_iter):
        #check wolfe condition
        proposal = theta - step_size * model.neg_log_posterior_grad(theta)
        checks = check_wolfe_conditions(model.neg_log_posterior,
                                        model.neg_log_posterior_grad, theta,
                                        step_size, c1, c2)
        #accept the proposal
        if checks[0] and checks[1]:
            trace_energy.append(model.neg_log_posterior(proposal))
            trace_theta.append(proposal)
            trace_lambdas.append(step_size)

            #convergence check
            if abs(
                    model.neg_log_posterior(theta) -
                    model.neg_log_posterior(proposal)) < 1e-6:
                update_progress(
                    1, message="early convergence at {} iterations".format(i))
                theta = proposal
                break

            theta = proposal

        #update the step size according to the Wolfe conditions
        elif checks[0]:
            step_size *= beta_C1
        elif checks[1]:
            step_size *= beta_C2
        else:
            raise RuntimeError(
                "Wolfe conditions both wrong, gradient must be wrong")

        if i % 5 == 0 or i == max_iter - 1:
            update_progress((i + 1) / max_iter)
    end = time.time()
    print("  duration: {}".format(
        str(datetime.timedelta(seconds=round(end - start)))))
    if save:
        model.results["Wolfe_cond_gd"] = theta

    if RETURN:
        if trace:
            return trace_theta, trace_energy, trace_lambdas
        return theta, i + 1