def upaintbox_sample(log_res, hold, Y, held_out, ext, sig, sig_w, iterate, K, data_run): print("Trial Number: " + str(data_run)) N, T = Y.shape res = 1 tree = gen_tree(K, res) ctree, ptree = tree Z = draw_Z_tree(tree, N) #W = sample_W(Y,Z,sig,sig_w) W = np.reshape(np.random.normal(0, sig_w, K * T), (K, T)) ll_list = [] iter_time = [] f_count = [] lapse_data = [] pred_ll = [] pred = 0 for it in range(iterate): if it % hold == 0: if res < 2**log_res: res = res * 2 start = time.time() N, K = Z.shape #sample Z Z, prob_matrix = sample_Z(Y, Z, W, sig, sig_w, tree) # if it%10 == 0: # print("iteration: " + str(it)) # print("Sparsity: " + str(np.sum(Z,axis=0))) # print('predictive log likelihood: ' + str(pred)) #sample paintbox tree, lapse = sample_pb(Z, tree, res) #sample W W = sample_W(Y, Z, sig, sig_w) #add new features ll_list.append(log_data_zw(Y, Z, W, sig)) F, D = get_FD(tree) f_count.append(F) #predictive log likelihood # if it%500 == 0: # pred = pred_ll_paintbox(held_out, W, tree, sig) # pred_ll.append(pred) # if it%1000 == 0 and it > 0: # display_W(W,3,3,3,3,'four') #handling last iteration edge case drop = 0 if it == iterate - 1: drop = 1 Z, W, tree = new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w, drop) end = time.time() iter_time.append(end - start) lapse_data.append(lapse) iter_time = np.cumsum(iter_time) return (ll_list, iter_time, f_count, lapse_data, Z, W, prob_matrix, pred_ll, tree)
def new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w): ctree, ptree = tree Z, W, tree = drop_feature(Z, W, tree) F, D = get_FD(tree) if F > 8: return (Z, W, tree) else: if F + ext < K: more = K - F else: more = ext tree = add(tree, more, res) Z = draw_feature(Z, tree, res, more) W = np.vstack((W, np.random.normal(0, sig_w, (more, T)))) #W = sample_W(Y,Z,sig,sig_w) return (Z, W, tree)
def print_paintbox(tree,W,data_dim,flag='four'): small_x,small_y,big_x,big_y = data_dim vec = get_vec(tree) F,D = get_FD(tree) K = int(math.log(D,2)) #print("outputting paintbox") for j in range(D): binary = list(map(int,"{0:b}".format(j))) pad_binary = [0]*(K-len(binary)) + binary prob = Z_paintbox(pad_binary,vec) if prob > 0.01: pad_binary = np.array(pad_binary) reconstruct = np.dot(pad_binary,W) #print("pad binary, reconstruct, probability") #print(pad_binary) #print(prob) #display_W(reconstruct,data_dim,flag) return 0
def new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w, drop): ctree, ptree = tree #debugging invariant vec = get_vec(tree) Z, W, tree = drop_feature(Z, W, tree) F, D = get_FD(tree) if F >= 12 or drop: return (Z, W, tree) else: if F + ext < K: more = K - F else: more = ext tree = add(tree, more, res) Z = draw_feature(Z, tree, res, more) W = np.vstack((W, np.random.normal(0, sig_w, (more, T)))) #W = sample_W(Y,Z,sig,sig_w) return (Z, W, tree)
def draw_Z_tree(tree, N): F, D = get_FD(tree) vec = get_vec(tree) normal_vec = 1. / np.sum(vec) * vec draws = np.random.multinomial(N, normal_vec) ctree, ptree = tree Z = np.zeros((N, F)) cum_draws = np.cumsum(draws) #generate Z for i in range(D): density = draws[i] binary = list(map(int, "{0:b}".format(i))) row = [0] * (F - len(binary)) + binary data_chunk = np.tile(row, (density, 1)) if i == 0: Z[0:cum_draws[i], :] = data_chunk else: Z[cum_draws[i - 1]:cum_draws[i], :] = data_chunk #np.random.shuffle(Z) return Z
def ugibbs_sample(Y, ext, sig, sig_w, iterate, K, data_run): print("Trial Number: " + str(data_run)) N, T = Y.shape tree = gen_tree(K, res) ctree, ptree = tree Z = draw_Z_tree(tree, N) #W = sample_W(Y,Z,sig,sig_w) W = np.reshape(np.random.normal(0, sig_w, K * T), (K, T)) ll_list = [] iter_time = [] f_count = [] for it in range(iterate): start = time.time() N, K = Z.shape #sample Z Z = sample_Z(Y, Z, W, sig, sig_w, tree) if it % 10 == 0: print("iteration: " + str(it)) print("Sparsity: " + str(np.sum(Z, axis=0))) #sample paintbox tree = sample_pb(Z, tree, res) #ctree,ptree = tree #sample W W = sample_W(Y, Z, sig, sig_w) #add new features vec = get_vec(tree) #ll_list.append(log_data_zw(Y,Z,W,sig) + Z_vec(Z,vec) + log_w_sig(W,sig)) ll_list.append(log_data_zw(Y, Z, W, sig)) F, D = get_FD(tree) f_count.append(F) Z, W, tree = new_feature(Y, Z, W, tree, ext, K, res, sig, sig_w) end = time.time() iter_time.append(end - start) iter_time = np.cumsum(iter_time) return (ll_list, iter_time, f_count, Z, W)
def upaintbox_sample(log_res,hold,Y,held_out,ext,sig,sig_w,iterate,K,truncate,obs_indices,limit,Z_init=[],W_init=[],data_dim = [3,3,2,2],init=False,display=False): #print('time limit') #print(limit) small_x,small_y,big_x,big_y = data_dim N,T = Y.shape #technically this is a bug #generating tree with res = 1 is wrong. #but you fixed it in tree paintbox hard coded 0.5 res = 1 tree = gen_tree(K,res) ctree,ptree = tree if init: Z = draw_Z_tree(tree,N) #Z = Z_init W = W_init else: Z = draw_Z_tree(tree,N) W = np.reshape(np.random.normal(0,sig_w,K*T),(K,T)) #Z = np.loadtxt('assignments.txt') #print(Z) #W = sample_W(Y,Z,sig,sig_w) #W = np.loadtxt('features.txt') # full = generate_gg_blocks() # W = np.zeros((3,T)) # W[0,:] = full[0,:] # W[1,:] = full[2,:] # W[2,:] = full[0,:] + full[2,:] # display_W(W,'four') ll_list = [] iter_time = [] f_count = [] lapse_data = [] pred_ll = [] pred = 0 rec_ll = [] rec = 0 observe = held_out[:,obs_indices] for redo in range(1): if redo == 1: res = 1 N,K = Z.shape tree = gen_tree(K,res) ctree,ptree = tree for it in range(iterate): if it == 0: start = time.time() if it > 0: #print(np.sum(iter_time)) #print(limit) if np.sum(iter_time) > limit: break if it%hold == 0: if res < 2**log_res: res = res*2 start = time.time() N,K = Z.shape #sample Z Z,prob_matrix = sample_Z(Y,Z,W,sig,tree) if it%10 == 0 and display: print("iteration: " + str(it)) print("Sparsity: " + str(np.sum(Z,axis=0))) #print('predictive log likelihood: ' + str(pred)) print('recover log likelihood: ' + str(rec)) #sample paintbox tree,lapse = sample_pb(Z,tree,res) #sample W W = sample_W(Y,Z,sig,sig_w) #add new features ll_list.append(log_data_zw(Y,Z,W,sig)) F,D = get_FD(tree) f_count.append(F) #recovered log likelihood if it%50 == 49 and it > 0: #pred = pred_ll_paintbox(held_out, W, tree, sig) pred = 0 pred_ll.append(pred) #rec = sample_recover(held_out,observe,W,tree,sig,obs_indices) rec = recover_paintbox(held_out,observe,W,tree,sig,obs_indices) rec_ll.append(rec) end = time.time() iter_time.append(end - start) start = time.time() #Auxiliary printouts #if it%500 == 0 and it > 0: # print_paintbox(tree,W,data_dim,'four') #if it%200 == 0 and it > 0: # display_W(W,data_dim,'nine') #handling last iteration edge case drop = 0 if it == iterate - 1: drop = 1 Z,W,tree = new_feature(Y,Z,W,tree,ext,K,res,sig,sig_w,drop,truncate) lapse_data.append(lapse) iter_time = np.cumsum(iter_time) return (ll_list,iter_time,f_count,lapse_data,Z,W,prob_matrix,pred_ll,rec_ll,tree)
def sample_pb(Z,tree,res): bound = 2 #the exponent F,D = get_FD(tree) ctree,ptree = tree vec = get_vec(tree) compact = Z_compact(Z) #iterate over features (row of paintbox) start_pb = time.time() count = 0 for i in range(F): #iterate over nodes j in tree layer i for j in range(2**i): start_zero = j*2**(F-i) end_one = (j+1)*2**(F-i) - 1 if np.sum(compact[start_zero:end_one+1]) == 0: continue count = count + 1 end_zero = j*2**(F-i) + 2**(F-i-1) - 1 #start_one = j*2**(F-i) + 2**(F-i-1) start_one = end_zero + 1 tot = np.sum(vec[start_zero:end_one+1]) if tot == 0: continue else: binary = map(int,"{0:b}".format(int(start_zero))) #start = np.concatenate((np.zeros(F-len(binary)), binary)) old_prob = float(np.sum(vec[start_one:end_one+1]))/tot unit = float(np.sum(vec[start_zero:end_one+1]))/res log_roulette = [] center = int(round(res*old_prob)) if center == res: lbound = res - 1 ubound = res elif center == 0: lbound = 0 ubound = 1 else: lbound = center - 1 ubound = center + 1 #lbound = 0 #ubound = res mat_vec = np.tile(vec,(ubound-lbound+1,1)) wheel = [w for w in range(ubound-lbound+1)] for k in range(lbound,ubound+1): mat_pos = k - lbound new_prob = float(k)/res if old_prob != new_prob: if old_prob == 0: ratio_zero = float((1 - new_prob))/(1 - old_prob) mat_vec[mat_pos,start_zero:end_zero+1] = ratio_zero*mat_vec[mat_pos,start_zero:end_zero+1] mat_vec[mat_pos,start_one] = unit*k elif old_prob == 1: ratio_one = float(new_prob)/old_prob mat_vec[mat_pos,start_one:end_one+1] = ratio_one*mat_vec[mat_pos,start_one:end_one+1] mat_vec[mat_pos,end_zero] = unit*(res-k) else: ratio_one = float(new_prob)/old_prob ratio_zero = float((1 - new_prob))/(1 - old_prob) mat_vec[mat_pos,start_one:end_one+1] = ratio_one*mat_vec[mat_pos,start_one:end_one+1] mat_vec[mat_pos,start_zero:end_zero+1] = ratio_zero*mat_vec[mat_pos,start_zero:end_zero+1] #bottleneck line #val = excise(Z,mat_vec[mat_pos,:],start,i) val = excise2(compact,mat_vec[mat_pos,:],start_zero,end_one) if math.isinf(val) or math.isnan(val) or val == -1: wheel.remove(mat_pos) else: log_roulette.append(val) if len(log_roulette) == 0: #print("paintbox update broken") sys.exit() shift = max(log_roulette) roulette = [np.exp(lr - shift) for lr in log_roulette] normal_roulette = [r/np.sum(roulette) for r in roulette] #Hacked Solution Beware try: bucket = int(np.where(np.random.multinomial(1,normal_roulette) == 1)[0]) chosen = wheel[bucket] except TypeError: #BEWARE, THIS CHANGES IF YOU ADJUST THE EXPONENT OF RES chosen = 1 #print("INVARIANT BROKEN") vec = mat_vec[chosen,:] ctree[i,j] = 0 ctree[i,j] = float(chosen+lbound)/res end_pb = time.time() lapse = end_pb-start_pb tree = update((ctree,ptree)) return tree,lapse
def sample_pb(Z, tree, res): F, D = get_FD(tree) ctree, ptree = tree vec = get_vec(tree) #iterate over features (row of paintbox) for i in range(F): #iterate over nodes j in tree layer i for j in range(2**i): mat_vec = np.tile(vec, (res + 1, 1)) start_zero = j * 2**(F - i) end_zero = j * 2**(F - i) + 2**(F - i - 1) - 1 start_one = j * 2**(F - i) + 2**(F - i - 1) end_one = (j + 1) * 2**(F - i) - 1 tot = np.sum(vec[start_zero:end_one + 1]) binary = map(int, "{0:b}".format(int(start_zero))) start = np.concatenate((np.zeros(F - len(binary)), binary)) if tot == 0: continue else: old_prob = float(np.sum(vec[start_one:end_one + 1])) / tot unit = float(np.sum(vec[start_zero:end_one + 1])) / res roulette = [] for k in range(res + 1): new_prob = float(k) / res if old_prob != new_prob: if old_prob == 0: ratio_zero = float((1 - new_prob)) / (1 - old_prob) mat_vec[k, start_zero:end_zero + 1] = ratio_zero * mat_vec[ k, start_zero:end_zero + 1] mat_vec[k, start_one] = unit * k elif old_prob == 1: ratio_one = float(new_prob) / old_prob mat_vec[k, start_one:end_one + 1] = ratio_one * mat_vec[ k, start_one:end_one + 1] mat_vec[k, end_zero] = unit * (res - k) else: ratio_one = float(new_prob) / old_prob ratio_zero = float((1 - new_prob)) / (1 - old_prob) mat_vec[k, start_one:end_one + 1] = ratio_one * mat_vec[ k, start_one:end_one + 1] mat_vec[k, start_zero:end_zero + 1] = ratio_zero * mat_vec[ k, start_zero:end_zero + 1] #bottleneck line val = excise(Z, mat_vec[k, :], start, i) #val = Z_vec(Z,mat_vec[k,:]) if math.isinf(val) or math.isnan(val) or val == 0: roulette.append(0.0) else: roulette.append(np.exp(val)) if np.sum(roulette) == 0: roulette = 1. / res * np.ones(res + 1) normal_roulette = [r / np.sum(roulette) for r in roulette] #Hacked Solution Beware try: chosen = int( np.where( np.random.multinomial(1, normal_roulette) == 1)[0]) except TypeError: chosen = int(round(res * old_prob)) #print("INVARIANT BROKEN") #print("Before Paintbox Update") #run_line = Z_vec(Z,vec) vec = mat_vec[chosen, :] ctree[i, j] = float(chosen) / res #print(roulette) #print("After Paintbox Update") #run_line = Z_vec(Z,vec) #print("ILLEGAL PAINTBOX UPDATE") tree = update((ctree, ptree)) return tree