def upaintbox_sample(log_res, hold, Y, held_out, ext, sig, sig_w, iterate, K, data_run): print("Trial Number: " + str(data_run)) N, T = Y.shape res = 1 tree = gen_tree(K, res) ctree, ptree = tree Z = draw_Z_tree(tree, N) #W = sample_W(Y,Z,sig,sig_w) W = np.reshape(np.random.normal(0, sig_w, K * T), (K, T)) ll_list = [] iter_time = [] f_count = [] lapse_data = [] pred_ll = [] pred = 0 for it in range(iterate): if it % hold == 0: if res < 2**log_res: res = res * 2 start = time.time() N, K = Z.shape #sample Z Z, prob_matrix = sample_Z(Y, Z, W, sig, sig_w, tree) # if it%10 == 0: # print("iteration: " + str(it)) # print("Sparsity: " + str(np.sum(Z,axis=0))) # print('predictive log likelihood: ' + str(pred)) #sample paintbox tree, lapse = sample_pb(Z, tree, res) #sample W W = sample_W(Y, Z, sig, sig_w) #add new features ll_list.append(log_data_zw(Y, Z, W, sig)) F, D = get_FD(tree) f_count.append(F) #predictive log likelihood # if it%500 == 0: # pred = pred_ll_paintbox(held_out, W, tree, sig) # pred_ll.append(pred) # if it%1000 == 0 and it > 0: # display_W(W,3,3,3,3,'four') #handling last iteration edge case drop = 0 if it == iterate - 1: drop = 1 Z, W, tree = new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w, drop) end = time.time() iter_time.append(end - start) lapse_data.append(lapse) iter_time = np.cumsum(iter_time) return (ll_list, iter_time, f_count, lapse_data, Z, W, prob_matrix, pred_ll, tree)
def recover_IBP(held, observe, Z, W, sig): N, half = observe.shape R, T = held.shape N, K = Z.shape log_recover = 0 for i in range(R): full_ll = 0 observe_ll = 0 for j in range(2**K): binary = list(map(int, "{0:b}".format(j))) pad_binary = [0] * (K - len(binary)) + binary log_z_post = Z_posterior(pad_binary, Z) total_z = np.array(pad_binary) full_ll = full_ll + np.exp( log_data_zw(held[i, :], total_z, W, sig) + log_z_post) observe_ll = observe_ll + np.exp( log_data_zw(observe[i, :], total_z, W[:, :half], sig) + log_z_post) log_recover = log_recover + np.log(full_ll) - np.log(observe_ll) return log_recover
def add_feature(i,Y,Z,W,tree,vec,prior,sig,sig_w): N,T = Y.shape N,K = Z.shape old = log_data_zw(Y,Z,W,sig) col = np.zeros((N,1)) col[i,0] = 1 Z_new = np.hstack((Z,col)) W_new = np.vstack((W,np.random.normal(0,sig_w,(1,T)))) #W_new = sample_W(Y,Z_new,sig,sig_w) new = log_data_zw(Y,Z_new,W_new,sig) new = new - old old = 0 roulette = [np.exp(old)*prior[0],np.exp(new)*prior[1]] normal_roulette = [float(r)/np.sum(roulette) for r in roulette] chosen = int(np.where(np.random.multinomial(1,normal_roulette) == 1)[0]) if chosen: Z = Z_new W = W_new tree = add(tree,res) vec = get_vec(tree) return (Z,W,tree,vec)
def recover_paintbox(held, observe, W, tree, sig): N, half = observe.shape R, T = held.shape K, T = W.shape log_recover = 0 vec = get_vec(tree) for i in range(R): full_ll = 0 observe_ll = 0 for j in range(2**K): binary = list(map(int, "{0:b}".format(j))) pad_binary = [0] * (K - len(binary)) + binary log_z_post = np.log(Z_paintbox(pad_binary, vec)) total_z = np.array(pad_binary) full_ll = full_ll + np.exp( log_data_zw(held[i, :], total_z, W, sig) + log_z_post) observe_ll = observe_ll + np.exp( log_data_zw(observe[i, :], total_z, W[:, :half], sig) + log_z_post) log_recover = log_recover + np.log(full_ll) - np.log(observe_ll) return log_recover
def sample_recover(held_out, observe, Z, A, sig, obs_indices): _, obs = observe.shape R, T = held_out.shape K, _ = A.shape N, _ = Z.shape #print("this is N") #print(N) z_prob = 1. / (N + 1) * np.sum(Z, axis=0) log_recover = 0 Z_total = np.vstack((Z, np.zeros((R, K)))) indices = [i for i in range(T)] hidden = [x for x in indices if x not in obs_indices] #print(hidden) iterate = 50 for it in range(iterate): for n in range(N, N + R): for k in range(K): # Sampling existing Z # Loop through instantiated Z IBP_one = float(Z_total[:, k].sum() - Z_total[n, k]) / (N + R - 1) IBP_zero = 1 - IBP_one Z_one = np.copy(Z_total) Z_zero = np.copy(Z_total) Z_one[n, k] = 1 Z_zero[n, k] = 0 like_one = ullikelihood(observe, Z_one[N:N + R, :], A[:, obs_indices], sig) like_zero = ullikelihood(observe, Z_zero[N:N + R, :], A[:, obs_indices], sig) shift = max([like_one, like_zero]) like_one = like_one - shift like_zero = like_zero - shift update_probability = float(IBP_one * np.exp(like_one)) / ( IBP_one * np.exp(like_one) + IBP_zero * np.exp(like_zero)) if (math.isnan(update_probability)): update_probability = 0 Z_total[n, k] = np.random.binomial(1, update_probability) #print("sampled Z") #print(Z_total) for r in range(R): log_recover += Z_posterior(Z_total[N + r, :], z_prob) #print('Z row') #print(Z_total[N+r,:]) #print('log likelihood of draw') #print(Z_posterior(Z_total[N+r,:],z_prob)) log_recover += log_data_zw(held_out[:, hidden], Z_total[N:N + R, :], A[:, hidden], sig) return log_recover
def cgibbs_sample(Y,sig,sig_w,iterate,D,F,N,T): pb = pb_init(D,F) Z = draw_Z(pb,D,F,N,T) ll_list = [] #print("gibbs sample") for it in range(iterate): #sample Z Z = sample_Z(Y,Z,sig,sig_w,pb,D,F,N,T) #sample paintbox pb = sample_pb(Z,pb,D,F,N,T,res) W = mean_w(Y,Z) vec = vectorize(pb) ll_list.append(log_data_zw(Y,Z,W,sig) + Z_vec(Z,vec,D) + log_w_sig(W,sig)) return (ll_list,Z,W,pb)
def pred_ll_paintbox(held,W,tree,sig): #should you be comparing the predictive log likelihood? I think you should R,T = held.shape K,T = W.shape log_pred = 0 vec = get_vec(tree) for i in range(R): pred_row = 0 for j in range(2**K): binary = list(map(int,"{0:b}".format(j))) pad_binary = [0]*(K-len(binary)) + binary log_z_post = np.log(Z_paintbox(pad_binary,vec)) total_z = np.array(pad_binary) pred_row = pred_row + np.exp(log_data_zw(held[i,:],total_z,W,sig) + log_z_post) log_pred = log_pred + np.log(pred_row) return log_pred
def pred_ll_IBP(held, Z, W, sig): #should you be comparing the predictive log likelihood? I think you should R, T = held.shape N, K = Z.shape log_pred = 0 for i in range(R): pred_row = 0 for j in range(2**K): binary = list(map(int, "{0:b}".format(j))) pad_binary = [0] * (K - len(binary)) + binary log_z_post = Z_posterior(pad_binary, Z) total_z = np.array(pad_binary) pred_row = pred_row + np.exp( log_data_zw(held[i, :], total_z, W, sig) + log_z_post) log_pred = log_pred + np.log(pred_row) return log_pred
def sample_recover(held,observe,W,tree,sig,obs_indices): N,obs = observe.shape R,T = held.shape K,T = W.shape log_recover = 0 vec = get_vec(tree) W_obs = W[:,obs_indices] #initialize Z Z = draw_Z_tree(tree,R) indices = [i for i in range(T)] hidden = [x for x in indices if x not in obs_indices] #print(hidden) iterate = 100 for it in range(iterate): N,K = Z.shape #sample Z Z,prob_matrix = sample_Z(held[:,obs_indices],Z,W_obs,sig,tree) #print("sampled Z") #print(Z) for row in Z: log_recover += np.log(Z_paintbox(row,vec)) log_recover += log_data_zw(held[:,hidden],Z,W[:,hidden],sig) return log_recover
def upaintbox_sample(log_res,hold,Y,held_out,ext,sig,sig_w,iterate,K,truncate,obs_indices,limit,Z_init=[],W_init=[],data_dim = [3,3,2,2],init=False,display=False): #print('time limit') #print(limit) small_x,small_y,big_x,big_y = data_dim N,T = Y.shape #technically this is a bug #generating tree with res = 1 is wrong. #but you fixed it in tree paintbox hard coded 0.5 res = 1 tree = gen_tree(K,res) ctree,ptree = tree if init: Z = draw_Z_tree(tree,N) #Z = Z_init W = W_init else: Z = draw_Z_tree(tree,N) W = np.reshape(np.random.normal(0,sig_w,K*T),(K,T)) #Z = np.loadtxt('assignments.txt') #print(Z) #W = sample_W(Y,Z,sig,sig_w) #W = np.loadtxt('features.txt') # full = generate_gg_blocks() # W = np.zeros((3,T)) # W[0,:] = full[0,:] # W[1,:] = full[2,:] # W[2,:] = full[0,:] + full[2,:] # display_W(W,'four') ll_list = [] iter_time = [] f_count = [] lapse_data = [] pred_ll = [] pred = 0 rec_ll = [] rec = 0 observe = held_out[:,obs_indices] for redo in range(1): if redo == 1: res = 1 N,K = Z.shape tree = gen_tree(K,res) ctree,ptree = tree for it in range(iterate): if it == 0: start = time.time() if it > 0: #print(np.sum(iter_time)) #print(limit) if np.sum(iter_time) > limit: break if it%hold == 0: if res < 2**log_res: res = res*2 start = time.time() N,K = Z.shape #sample Z Z,prob_matrix = sample_Z(Y,Z,W,sig,tree) if it%10 == 0 and display: print("iteration: " + str(it)) print("Sparsity: " + str(np.sum(Z,axis=0))) #print('predictive log likelihood: ' + str(pred)) print('recover log likelihood: ' + str(rec)) #sample paintbox tree,lapse = sample_pb(Z,tree,res) #sample W W = sample_W(Y,Z,sig,sig_w) #add new features ll_list.append(log_data_zw(Y,Z,W,sig)) F,D = get_FD(tree) f_count.append(F) #recovered log likelihood if it%50 == 49 and it > 0: #pred = pred_ll_paintbox(held_out, W, tree, sig) pred = 0 pred_ll.append(pred) #rec = sample_recover(held_out,observe,W,tree,sig,obs_indices) rec = recover_paintbox(held_out,observe,W,tree,sig,obs_indices) rec_ll.append(rec) end = time.time() iter_time.append(end - start) start = time.time() #Auxiliary printouts #if it%500 == 0 and it > 0: # print_paintbox(tree,W,data_dim,'four') #if it%200 == 0 and it > 0: # display_W(W,data_dim,'nine') #handling last iteration edge case drop = 0 if it == iterate - 1: drop = 1 Z,W,tree = new_feature(Y,Z,W,tree,ext,K,res,sig,sig_w,drop,truncate) lapse_data.append(lapse) iter_time = np.cumsum(iter_time) return (ll_list,iter_time,f_count,lapse_data,Z,W,prob_matrix,pred_ll,rec_ll,tree)
def recover_paintbox(held,observe,W,tree,sig,obs_indices): N,obs = observe.shape R,T = held.shape K,T = W.shape log_recover = 0 upper_bound = 0 lower_bound = 0 vec = get_vec(tree) for i in range(R): f_max = 0 o_max = 0 f_error = 0 o_error = 0 numu = 0 numl = 0 denu = 0 denl = 0 valid = True for j in range(2**K): binary = list(map(int,"{0:b}".format(j))) pad_binary = [0]*(K-len(binary)) + binary log_z_post = np.log(Z_paintbox(pad_binary,vec)) if math.isinf(log_z_post): continue total_z = np.array(pad_binary) fll = log_data_zw(held[i,:],total_z,W,sig) + log_z_post oll = log_data_zw(observe[i,:],total_z,W[:,obs_indices],sig) + log_z_post if valid: f_max = fll o_max = oll valid = False else: if fll > f_max: f_error = f_max f_max = fll if oll > o_max: o_error = o_max o_max = oll log_recover = log_recover + f_max - o_max if f_error == 0: numu = numu + f_max elif f_max - f_error > 10: numu = numu + f_max else: numu = numu + np.log(np.exp(f_max-f_error) + (2**K - 1)) + f_error numl = numl + f_max if o_error == 0: denu = denu + o_max elif o_max - o_error > 10: denu = denu + o_max else: denu = denu + np.log(np.exp(o_max-o_error) + (2**K - 1)) + o_error denl = denl + o_max upper_bound = upper_bound + numu - denl lower_bound = lower_bound + numl - denu #if math.isinf(lower_bound): #print("lower bound isinf") #print("estimate") #print(log_recover) #print("lower bound") #print(lower_bound) #print("upper bound") #print(upper_bound) return log_recover
def ugibbs_sampler(data_set, held_out, alpha, sigma_n, sigma_a, iter_count, select, trunc, data_dim, obs_indices): data_count = data_set.shape[0] X = data_set N = data_count K_max = 5 dim_count = data_set.shape[1] ll_set = np.zeros([iter_count]) lp_set = np.zeros([iter_count]) # Initialize Z randomly (explore how different initializations matter) #I'm going to explore different initalizations Z = np.random.binomial(1, 0.25, [N, 1]) active_K = 1 pred_ll = [] pred_prob = 0 rec = 0 # MCMC loop for mcmc_iter in range(iter_count): # Sampling existing A A = resample_A(data_set, Z, sigma_a, sigma_n) for n in range(data_count): for k in range(active_K): # Sampling existing Z # Loop through instantiated Z try: IBP_one = float(Z[:, k].sum() - Z[n, k]) / (N - 1) except IndexError: print('Index Error') IBP_zero = 1 - IBP_one Z_one = np.copy(Z) Z_zero = np.copy(Z) Z_one[n, k] = 1 Z_zero[n, k] = 0 like_one = ullikelihood(data_set, Z_one, A, sigma_n) like_zero = ullikelihood(data_set, Z_zero, A, sigma_n) shift = max([like_one, like_zero]) like_one = like_one - shift like_zero = like_zero - shift update_probability = float(IBP_one * np.exp(like_one)) / ( IBP_one * np.exp(like_one) + IBP_zero * np.exp(like_zero)) if (math.isnan(update_probability)): update_probability = 0 try: Z[n, k] = SPST.bernoulli.rvs(update_probability) except ValueError: print('ValueError') Z_sum = np.array(Z.sum(axis=0)) nonzero = list() for j in range(Z_sum.shape[0]): if Z_sum[j] != 0: nonzero.append(j) Z = Z[:, nonzero] A = A[nonzero, :] active_K = Z.shape[1] if active_K < trunc: Z_new = np.random.binomial(1, 0.25, [N, 1]) #Z_new = np.zeros((N,1)) mean = np.zeros(dim_count) cov = sigma_a * np.eye(dim_count) A_new = SPST.multivariate_normal.rvs(mean, cov) A = np.vstack((A, A_new)) Z = np.hstack((Z, Z_new)) active_K = Z.shape[1] # if mcmc_iter%1 == 0: # print("iteration: " + str(mcmc_iter)) # print("Sparsity: " + str(np.sum(Z,axis=0))) # print('predictive log likelihood: ' + str(pred_prob)) # print('recovery log likelihood: ' + str(rec)) # print("active K: " + str(active_K)) # #print_posterior(Z,A,data_dim) # Compute likelihood and prior ll_set[mcmc_iter] = log_data_zw(data_set, Z, A, sigma_n) # if mcmc_iter%10 == 0 and mcmc_iter > 0: # Z_trunc,A_trunc = truncate(Z,A,select) # pred_prob = pred_ll_IBP(held_out, Z_trunc, A_trunc,sigma_n) # pred_ll.append(pred_prob) # rec = recover_IBP(held_out,held_out[:,:dim_count/2],Z,A,sigma_n,obs_indices) return Z, A, ll_set, pred_ll
def ugibbs_sampler(data_set, held_out, alpha, sigma_n, sigma_a, iter_count, select, trunc, observe, obs_indices, limit, data_dim=[3, 3, 2, 2], init=False, display=False): data_count = data_set.shape[0] X = data_set N = data_count K_max = 5 dim_count = data_set.shape[1] ll_set = np.zeros([iter_count]) lp_set = np.zeros([iter_count]) iter_time = [] #Z = Z_gen #Z = np.random.binomial(1,0.25,[N,1]) #Z = np.random.binomial(1,0.25,[N,1]) Z = np.zeros((N, 1)) Z[randint(0, N - 1), 0] = 1 active_K = 1 pred_ll = [] pred_prob = 0 rec_ll = [] rec = 0 # MCMC loop for mcmc_iter in range(iter_count): if mcmc_iter == 0: start = time.time() if mcmc_iter > 0: if np.sum(iter_time) > limit: #print("break on time") #print(np.sum(iter_time)) break # Sampling existing A start = time.time() A = resample_A(data_set, Z, sigma_a, sigma_n) for n in range(data_count): for k in range(active_K): # Sampling existing Z # Loop through instantiated Z try: IBP_one = float(Z[:, k].sum() - Z[n, k]) / (N - 1) except IndexError: #print('Index Error') q = 3 IBP_zero = 1 - IBP_one Z_one = np.copy(Z) Z_zero = np.copy(Z) Z_one[n, k] = 1 Z_zero[n, k] = 0 like_one = ullikelihood(data_set, Z_one, A, sigma_n) like_zero = ullikelihood(data_set, Z_zero, A, sigma_n) shift = max([like_one, like_zero]) like_one = like_one - shift like_zero = like_zero - shift update_probability = float(IBP_one * np.exp(like_one)) / ( IBP_one * np.exp(like_one) + IBP_zero * np.exp(like_zero)) if (math.isnan(update_probability)): update_probability = 0 try: Z[n, k] = np.random.binomial(1, update_probability) except ValueError: #print('ValueError') q = 3 #to quick return, indent # Remove any unused # remove corresponding rows in A Z_sum = np.array(Z.sum(axis=0)) nonzero = list() for j in range(Z_sum.shape[0]): if Z_sum[j] != 0: nonzero.append(j) Z = Z[:, nonzero] A = A[nonzero, :] active_K = Z.shape[1] if active_K < trunc: #Z_new = np.random.binomial(1,0.005,[N,1]) #Z_new = np.zeros((N,1)) Z_new = np.zeros((N, 1)) Z_new[randint(0, N - 1)] = 1 mean = np.zeros(dim_count) cov = sigma_a * np.eye(dim_count) A_new = np.random.multivariate_normal(mean, cov) A = np.vstack((A, A_new)) Z = np.hstack((Z, Z_new)) active_K = Z.shape[1] if mcmc_iter % 1 == 0 and display: print("iteration: " + str(mcmc_iter)) print("Sparsity: " + str(np.sum(Z, axis=0))) print('predictive log likelihood: ' + str(pred_prob)) print('recovery log likelihood: ' + str(rec)) print("active K: " + str(active_K)) # Compute likelihood and prior #beware of init variable--logically unsound if mcmc_iter % 2 == 0 and mcmc_iter > 0 and init: pred_prob = 0 pred_ll.append(pred_prob) rec = recover_IBP(held_out, observe, Z, A, sigma_n, obs_indices) #rec = sample_recover(held_out,observe,Z,A,sigma_n,obs_indices) rec_ll.append(rec) end = time.time() iter_time.append(end - start) start = time.time() ll_set[mcmc_iter] = log_data_zw(data_set, Z, A, sigma_n) iter_time = np.cumsum(iter_time) return Z, A, ll_set, pred_ll, rec_ll, iter_time
def recover_IBP(held, observe, Z, W, sig, obs_indices): _, obs = observe.shape R, T = held.shape K, T = W.shape N, T = Z.shape z_prob = 1. / (N + 1) * np.sum(Z, axis=0) log_recover = 0 upper_bound = 0 lower_bound = 0 for i in range(R): #print('row') #print(i) f_max = 0 o_max = 0 f_error = 0 o_error = 0 numu = 0 numl = 0 denu = 0 denl = 0 valid = True for j in range(2**K): binary = list(map(int, "{0:b}".format(j))) pad_binary = [0] * (K - len(binary)) + binary log_z_post = Z_posterior(pad_binary, z_prob) if math.isinf(log_z_post): continue total_z = np.array(pad_binary) #print(held[i,:].shape) #print(total_z.shape) #print(W.shape) fll = log_data_zw(held[i, :], total_z, W, sig) + log_z_post oll = log_data_zw(observe[i, :], total_z, W[:, obs_indices], sig) + log_z_post if valid: f_max = fll o_max = oll valid = False else: if fll > f_max: f_error = f_max f_max = fll if oll > o_max: o_error = o_max o_max = oll log_recover = log_recover + f_max - o_max if f_error == 0: numu = numu + f_max elif f_max - f_error > 10: numu = numu + f_max else: numu = numu + np.log(np.exp(f_max - f_error) + (2**K - 1)) + f_error numl = numl + f_max if o_error == 0: denu = denu + o_max elif o_max - o_error > 10: denu = denu + o_max else: denu = denu + np.log(np.exp(o_max - o_error) + (2**K - 1)) + o_error denl = denl + o_max upper_bound = upper_bound + numu - denl lower_bound = lower_bound + numl - denu #if math.isinf(lower_bound): #print("lower bound isinf") return log_recover