def GVA(model, max_steps=10, mu=None, L=None, nb_samples=100): if mu is None: mu = np.zeros(model.size) if L is None: L = 2 * np.eye(model.size) for i in range(max_steps): eta = np.random.randn(model.size, nb_samples) nlp_in = mu + np.transpose(mat.exp(L) @ eta) elbo_grad_L = 0 elbo_grad_mu = 0 for iter in range(nlp_in.shape[0]): elbo_grad_mu -= model.neg_log_posterior_grad( nlp_in[iter, :]) / nb_samples elbo_grad_L += mat.exp(2 * L) @ model.neg_log_posterior_hessian( nlp_in[iter, :]) elbo_grad_L = -mat.sym(elbo_grad_L / nb_samples) + np.eye(len(mu)) mu += 1e-3 * elbo_grad_mu / iter L += 1e-3 * elbo_grad_L / iter update_progress((i + 1) / max_steps) # return values return mu, L
def stochastic_gd(model, step_size=1e-4, max_iter=100, trace=False, RETURN=False, save=True): X = model.data.copy() y = model.response.copy() initial = np.random.rand(model.size) #dirty fix for the variance initial[0] += 1 trace_theta = [] trace_energy = [] theta = initial start = time.time() fun = model.neg_log_posterior grad_fun = model.neg_log_posterior_grad batch_size = 50 for i in range(max_iter): #compute gradient on only a subset of the data points for y_batch, x_batch in batch_iter(y, X, batch_size=batch_size): #update and append theta = theta - step_size * grad_fun(theta, X=x_batch, y=y_batch) trace_theta.append(theta) trace_energy.append(fun(theta)) #convergence check if len(trace_theta) > 3: if abs(fun(theta) - fun(trace_theta[-2])) < 1e-6: update_progress( 1, message="early convergence at {} iterations".format(i)) break if i % 5 == 0 or i == max_iter - 1: update_progress((i + 1) / max_iter) end = time.time() print(" duration: {}".format( str(datetime.timedelta(seconds=round(end - start))))) if trace: return trace_energy, trace_theta elif RETURN: return theta, i + 1
def vanilla_gd(model, max_iter=10, step_size=1e-4, initial=None, trace=False, RETURN=False, save=True): if initial is None: initial = np.random.randn(model.size) #set the variance to a positive parameter initial[0] = 1 fun = model.neg_log_posterior grad_fun = model.neg_log_posterior_grad trace_energy = [fun(initial)] trace_theta = [initial] start = time.time() theta = initial for i in range(max_iter): #update and append theta = theta - step_size * grad_fun(theta) trace_theta.append(theta) trace_energy.append(fun(theta)) #convergence check if abs(fun(theta) - fun(trace_theta[-2])) < 1e-6: update_progress( 1, message="early convergence at {} iterations".format(i)) break if i % 5 == 0 or i == max_iter - 1: update_progress((i + 1) / max_iter) end = time.time() print(" duration: {}".format( str(datetime.timedelta(seconds=round(end - start))))) if save: model.results["gd"] = theta if trace: return trace_theta, trace_energy elif RETURN: return theta, i + 1
def newton_gd(model, initial=None, max_iter=10, trace=False, RETURN=False): raise Warning("unstable method, should be fixed if time") fun = model.neg_log_posterior grad_fun = model.neg_log_posterior_grad hes_fun = model.neg_log_posterior_hessian if initial is None: initial = np.ones(model.size) initial[0] = 2 if trace: trace_thetas = [initial] trace_energy = [fun(initial)] start = time.time() theta = initial.copy() for i in range(max_iter): v = np.dot(np.linalg.inv(hes_fun(theta)), grad_fun(theta)) cand = theta - v if trace: trace_thetas.append(cand) trace_energy.append(fun(cand)) theta = cand if i % 5 == 0 or i == max_iter - 1: update_progress((i + 1) / max_iter) end = time.time() print(" duration: {}".format( str(datetime.timedelta(seconds=round(end - start))))) model.results["Newton_gd"] = theta if trace: return trace_thetas, trace_energy if RETURN: return theta
def MH_whithin_Gibbs(model, verbose=False, verbose_gen=True, RETURN=False, **kwargs): ''' Metropolis within Gibbs, update a batch of parameters at the same time, not necessarly the all vector use the same input as MH_vanilla ''' #get the size of the parameter to simulate size = model.size #initial point if "initial" in kwargs.keys(): current = np.array(kwargs["initial"]) else: current = np.ones(size) #np.random.randn(size) #step size if 'step_size' in kwargs.keys(): step_size = kwargs["step_size"] else: step_size = 0.04 print("default step size selected : {}".format(step_size)) #number of iterations if 'max_iter' in kwargs.keys(): max_iter = kwargs["max_iter"] else: max_iter = 10 if 'batch' in kwargs.keys(): batch = kwargs["batch"] else: batch = 5 print("default batch is {}".format(batch)) #create empty containers samples = np.zeros([max_iter, size]) record_acceptance = np.zeros(max_iter) #performance measures start = time.time() #actual MCMC simulation for k in range(max_iter): #choose an index to update indices = np.random.randint(len(current), size=batch) #update the sample accordingly step = np.zeros_like(current) for index in indices: step[index] = np.random.randn() proposal = current + step_size * step #compute its acceptance ratio ratio = np.exp(model.log_posterior(proposal) \ - model.log_posterior(current)) #check if accepted threshold = np.random.random() if ratio > threshold: current = proposal #update samples an acceptance accordingly samples[k, :] = current record_acceptance[k] = (ratio > threshold) if verbose_gen == True: if k % 5 == 0 or k == max_iter - 1: update_progress((k + 1) / max_iter) # saving the data #defining the burnin parameter if "burning" in kwargs.keys(): burning = kwargs["burning"] else: burning = int(max_iter / 10) #saving the estimates in the model ## NOTE: could be generalized if time covariance = np.cov(samples[burning:].T) model.results["MH_Gibbs_mean"] = [ np.mean(samples[burning:], axis=0), covariance ] end = time.time() if verbose: print(" Acceptance rate : {:2.1%} (advised values between 10% and 50%)"\ .format(np.mean(record_acceptance))) print(" duration: {}".format( str(datetime.timedelta(seconds=round(end - start))))) model.time["MH_Gibbs"] = [end - start, max_iter] if RETURN: if "acc" in kwargs.keys(): if kwargs["acc"] == True: return samples, np.mean(record_acceptance) return samples
def Langevin_MH(model, tau, verbose=True, verbose_gen=True, RETURN=False, **kwargs): size = model.size if "initial" in kwargs.keys(): current = np.array(kwargs["initial"]) else: current = np.ones(size) if 'max_iter' in kwargs.keys(): max_iter = kwargs["max_iter"] else: max_iter = 10 if "step_size" in kwargs.keys(): step_size = kwargs["step_size"] else: step_size = 1 samples = np.zeros([max_iter, size]) record_acceptance = np.zeros(max_iter) start = time.time() def log_qprop(X, Y, tau, grad): # probability to go to X given Y R = -1 / (4 * tau) * np.linalg.norm(X - Y - tau * grad(Y))**2 return R for k in range(max_iter): #compute the proposal based on the Langevin update proposal = current + step_size*(tau*model.log_posterior_grad(current)+\ sqrt(2*tau)*np.random.randn(size)) #dirty trick to avoid proposition for negative variance # REVIEW: code it in distribution, but beware it could break down other computation if proposal[0] > 0 or model.name\ == "Conditional model : Multilogistic, Prior : gaussian": #comppute acceptance ratio ratio = model.log_posterior(proposal) -\ model.log_posterior(current) ratio = ratio - log_qprop(current, proposal, tau, model.log_posterior_grad) ratio += log_qprop(proposal, current, tau, model.log_posterior_grad) ratio = np.exp(ratio) threshold = np.random.random() else: #otherwise since the proposed value is non valid #we won't accept it ratio = 0 if ratio > threshold: current = proposal #record the new samples samples[k, :] = current record_acceptance[k] = (ratio > threshold) if verbose_gen == True: if k % 5 == 0 or k == max_iter - 1: update_progress((k + 1) / max_iter) if "burning" in kwargs.keys(): burning = kwargs["burning"] else: burning = int(max_iter / 10) covariance = np.cov(samples[burning:].T) model.results["MH_Langevin"] = [ np.mean(samples[burning:], axis=0), covariance ] if verbose: print(" Acceptance rate : {:2.1%} \ (advised values between 10% and 50%)"\ .format(np.mean(record_acceptance))) end = time.time() if verbose: print(" duration: {}".format( str(datetime.timedelta(seconds=round(end - start))))) model.time["MH_langevin"] = [end - start, max_iter] if RETURN: if "acc" in kwargs.keys(): if kwargs["acc"] == True: return samples, np.mean(record_acceptance) return samples
def random_walk_MH(model, verbose=False, verbose_gen=True, RETURN=False, **kwargs): """Metropolis Hastings sampling algortihm. Parameters ---------- step_size : float step_size of the Markov Chain (the default is 0.05). max_iter : type maximum of iterations of the algortihm (the default is 100). verbose: bool to print the acceptance rate **kwargs : type size: float size of the beta parameter of the log_posterior_function initial: ndarray starting point of the algorithm (optional) acc: bool to return the Acceptance rate of the Markov Chain Returns ------- nd_array simulated samples from the post of model of size max_iter Examples ------- >>> gaussian_model = Model(Gaussian_prior,Gaussian, ... ) >>> samples = MH_sampling(gaussian_model, max_iter = 300 , step_size = 0.06, size = 17) """ #get the size of the parameter to simulate size = model.size #initial point if "initial" in kwargs.keys(): current = np.array(kwargs["initial"]) else: current = np.ones(size) #np.random.randn(size) #step size if 'step_size' in kwargs.keys(): step_size = kwargs["step_size"] else: step_size = 0.04 print("default step size selected : {}".format(step_size)) #number of iterations if 'max_iter' in kwargs.keys(): max_iter = kwargs["max_iter"] else: max_iter = 10 #create empty containers samples = np.zeros([max_iter, size]) record_acceptance = np.zeros(max_iter) #performance measures start = time.time() #actual MCMC simulation for k in range(max_iter): #update the current sample proposal = current + step_size * np.random.randn(size) #compute its acceptance ratio ratio = np.exp(model.log_posterior(proposal) \ - model.log_posterior(current)) #check if accepted threshold = np.random.random() if ratio > threshold: current = proposal #update samples an acceptance accordingly samples[k, :] = current record_acceptance[k] = (ratio > threshold) if verbose_gen == True: if k % 5 == 0 or k == max_iter - 1: update_progress((k + 1) / max_iter) # saving the data #defining the burnin parameter if "burning" in kwargs.keys(): burning = kwargs["burning"] else: burning = int(max_iter / 10) #saving the estimates in the model ## NOTE: could be generalized if time covariance = np.cov(samples[burning:].T) model.results["MH_vanilla"] = [ np.mean(samples[burning:], axis=0), covariance ] end = time.time() if verbose: print(" Acceptance rate : {:2.1%} (advised values between 10% and 50%)"\ .format(np.mean(record_acceptance))) print(" duration: {}".format( str(datetime.timedelta(seconds=round(end - start))))) model.time["MH_vanilla"] = [end - start, max_iter] if RETURN: if "acc" in kwargs.keys(): if kwargs["acc"] == True: return samples, np.mean(record_acceptance) return samples
def line_search_gd(model, x0=None, lambda_=1e-4, alpha=0.2, beta=0.5, max_iter=20, epsilon=1e-4, trace=False, RETURN=False, save=True): f = model.neg_log_posterior df = model.neg_log_posterior_grad if x0 is None: x0 = np.random.randn(model.size) x0[0:2] = 1 values = [x0] energies = [f(x0)] old = x0 start = time.time() for i in range(max_iter): gradient = df(old) l = lambda_ candidate = old - l * gradient #security measure j = 0 #acceptance criterion while f(candidate) > f(old) - l * alpha * np.linalg.norm(gradient)**2: l *= beta candidate = old - l * gradient j += 1 if j > 100: print("more than 100 iterations to adjust the step size") break #check for early convergence criterion if abs(f(candidate) - f(old)) < 1e-6: update_progress( 1, message="early convergence at {} iterations".format(i)) break values = np.concatenate((values, candidate.reshape(1, len(candidate)))) energies.append(f(candidate)) old = candidate if i % 5 == 0 or i == max_iter - 1: update_progress((i + 1) / max_iter) end = time.time() print(" duration: {}".format( str(datetime.timedelta(seconds=round(end - start))))) if save: model.results["line_search_gd"] = old if trace: return values, energies if RETURN: return old, i + 1
def Wolfe_cond_gd(model, lambda_0=None, initial=None, max_iter=10, trace=False, RETURN=False, save=True, c1=1e-4, c2=0.9, beta_C1=0.9, beta_C2=1.1): if initial is None: initial = np.ones(model.size) if lambda_0 is None: lambda_0 = 1e-3 # trace for vizualization purpose trace_theta = [initial] trace_lambdas = [lambda_0] trace_energy = [model.neg_log_posterior(initial)] theta = initial.copy() step_size = lambda_0 start = time.time() for i in range(max_iter): #check wolfe condition proposal = theta - step_size * model.neg_log_posterior_grad(theta) checks = check_wolfe_conditions(model.neg_log_posterior, model.neg_log_posterior_grad, theta, step_size, c1, c2) #accept the proposal if checks[0] and checks[1]: trace_energy.append(model.neg_log_posterior(proposal)) trace_theta.append(proposal) trace_lambdas.append(step_size) #convergence check if abs( model.neg_log_posterior(theta) - model.neg_log_posterior(proposal)) < 1e-6: update_progress( 1, message="early convergence at {} iterations".format(i)) theta = proposal break theta = proposal #update the step size according to the Wolfe conditions elif checks[0]: step_size *= beta_C1 elif checks[1]: step_size *= beta_C2 else: raise RuntimeError( "Wolfe conditions both wrong, gradient must be wrong") if i % 5 == 0 or i == max_iter - 1: update_progress((i + 1) / max_iter) end = time.time() print(" duration: {}".format( str(datetime.timedelta(seconds=round(end - start))))) if save: model.results["Wolfe_cond_gd"] = theta if RETURN: if trace: return trace_theta, trace_energy, trace_lambdas return theta, i + 1