def getRandomCSIMixture_conditionalDists(G, p, KL_lower, KL_upper, M=8, dtypes='discgauss', seed = None, fullstruct=False, disc_sampling_dist=None): # if seed: # random.seed(seed) # mixextend.set_gsl_rng_seed(seed) # #print '*** seed=',seed # # else: # XXX debug # seed = random.randint(1,9999999) # mixextend.set_gsl_rng_seed(seed) # random.seed(seed) # #print '*** seed=',seed if disc_sampling_dist == None: discSamp = DirichletPrior(M,[1.0] * M ) # uniform sampling else: discSamp = disc_sampling_dist min_sigma = 0.3 # minimal std for Normal max_sigma = 5.0 # maximal std for Normal min_mu = -25.0 # minimal mean max_mu = 25.0 # maximal mean assert dtypes in ['disc','gauss','discgauss'] if dtypes == 'disc': featureTypes = [0] * p elif dtypes == 'gauss': featureTypes = [1] * p elif dtypes == 'discgauss': # discrete or Normal features for now, chosen uniformly # 0 discrete, 1 Normal featureTypes = [ random.choice( (0, 1) ) for i in range(p) ] else: raise TypeError #print featureTypes # generate random CSI structures if G < 15: P = setPartitions.generate_all_partitions(G) # XXX too slow for large G #print P C = [] leaders = [] groups = [] for j in range(p): c_j = {} leaders_j = [] groups_j = {} if fullstruct == True: struct_j = [(i,) for i in range(G)] elif G < 15: struct_j = random.choice(P) else: print 'WARNING: improper structure sampling !' struct_j = setPartitions.get_random_partition(G) #print '\nstruct',j,struct_j for i,grp in enumerate(struct_j): lg = list(grp) #print lg lgj = lg.pop(0) #print lgj leaders_j.append(lgj) groups_j[lgj] = lg max_tries = 100000 tries = 0 if featureTypes[j] == 0: acc = 0 while acc == 0: cand = discSamp.sample() #print 'Cand:', cand acc = 1 for d in c_j: KL_dist = sym_kl_dist(c_j[d],cand) #print c_j[d],cand, KL_dist if KL_dist > KL_upper or KL_dist < KL_lower: acc = 0 tries += 1 break if tries >= max_tries: raise RuntimeError, 'Failed to find separated parameters !' for cind in grp: c_j[cind] = cand elif featureTypes[j] == 1: acc = 0 while acc == 0: mu = random.uniform(min_mu, max_mu) sigma = random.uniform(min_sigma, max_sigma) cand = NormalDistribution(mu, sigma ) acc = 1 for d in c_j: KL_dist = sym_kl_dist(c_j[d],cand) if KL_dist > KL_upper or KL_dist < KL_lower: acc = 0 tries += 1 break if tries >= max_tries: raise RuntimeError # print '.', #print for cind in grp: c_j[cind] = cand else: RuntimeError leaders.append(leaders_j) groups.append(groups_j) C.append(c_j) comps = [] for i in range(G): comps.append( ProductDistribution( [ C[j][i] for j in range(p) ] ) ) pi = get_random_pi(G, 0.3 / G) #print '** pi =',pi # create prior piprior = DirichletPrior(G,[2.0]*G) cprior = [] for j in range(p): if featureTypes[j] == 0: cprior.append( DirichletPrior(M,[1.02]*M)) elif featureTypes[j] == 1: cprior.append( NormalGammaPrior(0,0,0,0)) # dummy parameters, to be set later else: RuntimeError mprior = MixtureModelPrior(0.1,0.1, piprior, cprior) m = BayesMixtureModel(G,pi, comps, mprior, struct =1) m.leaders = leaders m.groups = groups m.identifiable() m.updateFreeParams() #print m return m
def updateStructureBayesianFullEnumeration(model, data, objFunction='MAP', silent=1): """ CSI structure learning with full enumeration of the structure space. @param model: BayesMixtureModel object @param data: DataSet object @param objFunction: objective function of the optimization, only 'MAP' so far @param silent: verbosity flag, default is True """ P = setPartitions.generate_all_partitions( model.G, order='reverse') # XXX too slow for large G max_ind = len(P) - 1 curr_indices = [0] * model.dist_nr curr_indices[0] = -1 lpos = model.dist_nr - 1 nr = 1 term = 0 prev_indices = [-1] * model.dist_nr # initial structure is full CSI matrix best_structure = [[ (i, ) for i in range(model.G) ]] * model.dist_nr # [ tuple(range(model.G)) ] * model.dist_nr # building data likelihood factor matrix for the current group structure l = np.zeros((model.dist_nr, model.G, data.N), dtype='Float64') for j in range(model.dist_nr): for lead_j in range(model.G): l_row = model.components[lead_j][j].pdf(data.getInternalFeature(j)) l[j, lead_j, :] = l_row # g is the matrix of log posterior probabilities of the components given the data g = np.sum(l, axis=0) for k in range(model.G): g[k, :] += np.log(model.pi[k]) sum_logs = matrix_sum_logs(g) g_norm = g - sum_logs tau = np.exp(g_norm) if not silent: print "\ntau=" for tt in tau: print tt.tolist() print # computing posterior as model selection criterion temp = DiscreteDistribution(model.G, model.pi) pi_prior = model.prior.piPrior.pdf(temp) log_prior = pi_prior log_prior_list = [0.0] * model.dist_nr for j in range(model.dist_nr): for r in range(model.G): log_prior_list[j] += model.prior.compPrior[j].pdf( model.components[r][j]) # log_prior += sum(log_prior_list) # # # prior over number of components # log_prior += model.prior.nrCompPrior * model.G # # prior over number of distinct groups # for j in range(model.dist_nr): # log_prior += model.prior.structPrior * len(model.leaders[j]) # # # get posterior # lk = np.sum(sum_logs) # best_post = lk + log_prior # if not silent: # print best_structure,':' # print "0: ", lk ,"+", log_prior,"=", best_post # #print log_prior_list best_post = float('-inf') # initialize merge histories L = [{} for j in range(model.dist_nr)] for j in range(model.dist_nr): # extracting current feature from the DataSet if isinstance(model.components[0][j], MixtureModel): # XXX data_j = data.singleFeatureSubset(j) else: data_j = data.getInternalFeature(j) for lead in range( model.G): # every component is a leader for initialization el_dist = copy.copy(model.components[lead][j]) tau_pool = copy.copy(tau[lead, :]) pi_pool = model.pi[lead] if objFunction == 'MAP': model.prior.compPrior[j].mapMStep(el_dist, tau_pool, data_j, pi_pool) else: # should never get here... raise TypeError stat = el_dist.sufficientStatistics(tau_pool, data_j) M = CandidateGroup(el_dist, np.sum(tau_pool), pi_pool, stat) l_row = el_dist.pdf(data_j) M.l = l_row M.dist_prior = model.prior.compPrior[j].pdf(el_dist) L[j][(lead, )] = M g_wo_j = np.zeros((model.G, data.N), dtype='Float64') best_indices = copy.copy(curr_indices) while 1: if not silent: print '\n----------------------------------' curr_indices[0] += 1 if curr_indices[0] > max_ind: #curr_indices[lpos] = 0 for e in range(model.dist_nr): if e == model.dist_nr - 1: if curr_indices[e] > max_ind: term = 1 break if curr_indices[e] > max_ind: curr_indices[e] = 0 curr_indices[e + 1] += 1 if term: break #print '\nprev:',prev_indices if not silent: print nr, ':', curr_indices, '->', [P[jj] for jj in curr_indices] g_wo_prev = copy.copy(g) g_this_struct = np.zeros((model.G, data.N)) for j in range(model.dist_nr): if prev_indices[j] == curr_indices[j]: #print ' -> unchanged',j,curr_indices[j], P[curr_indices[j]] break else: #print '\n--------\nChanged',j,curr_indices[j], P[curr_indices[j]] curr_struct_j = P[curr_indices[j]] # unnormalized posterior matrix without the contribution of the jth feature try: g_wo_prev = g_wo_prev - l[j] except FloatingPointError: # if there was an exception we have to compute each # entry in g_wo_j seperately to set -inf - -inf = -inf g_wo_prev = mixextend.substract_matrix(g_wo_prev, l[j]) # extracting current feature from the DataSet if isinstance(model.components[0][j], MixtureModel): # XXX data_j = data.singleFeatureSubset(j) else: data_j = data.getInternalFeature(j) l_j_1 = np.zeros( (model.G, data.N)) # XXX needs only be done once #print '\n\n***', curr_struct_j for cs_j in curr_struct_j: #print ' ->',cs_j if L[j].has_key(cs_j): #print ' ** REcomp',cs_j # retrieve merge data from history candidate_dist = L[j][cs_j].dist if not silent: print j, " R candidate:", cs_j, candidate_dist l_row = L[j][cs_j].l #cdist_prior = L[j][cs_j].dist_prior else: #print ' ** comp',cs_j M = model.prior.compPrior[j].mapMStepMerge( [L[j][(c, )] for c in cs_j]) #print '\n *** compute:',hist_ind1,hist_ind2 candidate_dist = M.dist if not silent: print j, " C candidate:", cs_j, candidate_dist l_row = candidate_dist.pdf(data_j) #print ' l_row=',l_row #cdist_prior = M.l = l_row M.dist_prior = model.prior.compPrior[j].pdf( candidate_dist) L[j][cs_j] = M for c in cs_j: l_j_1[c, :] = l_row #print ' ->',c #g_this_struct[c,:] += l_row g_this_struct += l_j_1 l[j] = l_j_1 # compute parameter prior for the candidate merge parameters log_prior_list_j = 0.0 for r in curr_struct_j: log_prior_list_j += L[j][r].dist_prior * len(r) log_prior_list[j] = log_prior_list_j # get updated unnormalized posterior matrix g_1 = g_wo_prev + g_this_struct # print '\ng_wo_j:' # for gg in g_wo_j: # print gg.tolist() # # print '\nl_j_1:' # for gg in l_j_1: # print gg.tolist() # # # print '\ng_1:' # for gg in g_1: # print gg.tolist() sum_logs = matrix_sum_logs(g_1) lk_1 = np.sum(sum_logs) #print '\n *** likelihood =', lk_1 # computing posterior as model selection criterion log_prior_1 = pi_prior #print r, L[r].dist_prior * len(r),L[r].dist_prior #print '\nlog_prior_list_j =',log_prior_list_j #print 'log_prior_list',log_prior_list log_prior_1 += sum(log_prior_list) #print '2:',log_prior_1 # prior over number of components log_prior_1 += model.prior.nrCompPrior * model.G # prior over number of distinct groups for z in range(model.dist_nr): log_prior_1 += model.prior.structPrior * len(P[curr_indices[z]]) #print '3:',log_prior_1 post_1 = lk_1 + log_prior_1 if not silent: print '\nPosterior:', post_1, '=', lk_1, '+', log_prior_1 if post_1 >= best_post: # current candidate structure is better than previous best if not silent: print "*** New best candidate", post_1, ">=", best_post if post_1 == best_post: print '******* Identical maxima !' print 'current:', curr_indices print 'best:', best_indices best_indices = copy.copy(curr_indices) best_post = post_1 nr += 1 g = g_1 # set likelihood matrix for the next candidate structure # XXX DEBUG XXX #if nr > 500: # term = 1 prev_indices = copy.copy(curr_indices) # setting updated structure in model for j in range(model.dist_nr): lead = [] groups = {} #print j,best_indices[j] best_partition = P[best_indices[j]] for gr in best_partition: gr_list = list(gr) gr_lead = gr_list.pop(0) lead.append(gr_lead) groups[gr_lead] = gr_list # assigning distributions according to new structure model.components[gr_lead][j] = L[j][gr].dist for d in gr_list: model.components[d][j] = model.components[gr_lead][j] model.leaders[j] = lead model.groups[j] = groups # print '** G=',model.G # print '** p=',model.dist_nr # print '** nr =',nr if not silent: print '\n*** Globally optimal structure out of', nr, 'possible:' print[P[best_indices[j]] for j in range(model.dist_nr)]
def getRandomCSIMixture_conditionalDists(G, p, KL_lower, KL_upper, M=8, dtypes='discgauss', seed=None, fullstruct=False, disc_sampling_dist=None): # if seed: # random.seed(seed) # mixextend.set_gsl_rng_seed(seed) # #print '*** seed=',seed # # else: # XXX debug # seed = random.randint(1,9999999) # mixextend.set_gsl_rng_seed(seed) # random.seed(seed) # #print '*** seed=',seed if disc_sampling_dist == None: discSamp = DirichletPrior(M, [1.0] * M) # uniform sampling else: discSamp = disc_sampling_dist min_sigma = 0.3 # minimal std for Normal max_sigma = 5.0 # maximal std for Normal min_mu = -25.0 # minimal mean max_mu = 25.0 # maximal mean assert dtypes in ['disc', 'gauss', 'discgauss'] if dtypes == 'disc': featureTypes = [0] * p elif dtypes == 'gauss': featureTypes = [1] * p elif dtypes == 'discgauss': # discrete or Normal features for now, chosen uniformly # 0 discrete, 1 Normal featureTypes = [random.choice((0, 1)) for i in range(p)] else: raise TypeError #print featureTypes # generate random CSI structures if G < 15: P = setPartitions.generate_all_partitions( G) # XXX too slow for large G #print P C = [] leaders = [] groups = [] for j in range(p): c_j = {} leaders_j = [] groups_j = {} if fullstruct == True: struct_j = [(i, ) for i in range(G)] elif G < 15: struct_j = random.choice(P) else: print 'WARNING: improper structure sampling !' struct_j = setPartitions.get_random_partition(G) #print '\nstruct',j,struct_j for i, grp in enumerate(struct_j): lg = list(grp) #print lg lgj = lg.pop(0) #print lgj leaders_j.append(lgj) groups_j[lgj] = lg max_tries = 100000 tries = 0 if featureTypes[j] == 0: acc = 0 while acc == 0: cand = discSamp.sample() #print 'Cand:', cand acc = 1 for d in c_j: KL_dist = sym_kl_dist(c_j[d], cand) #print c_j[d],cand, KL_dist if KL_dist > KL_upper or KL_dist < KL_lower: acc = 0 tries += 1 break if tries >= max_tries: raise RuntimeError, 'Failed to find separated parameters !' for cind in grp: c_j[cind] = cand elif featureTypes[j] == 1: acc = 0 while acc == 0: mu = random.uniform(min_mu, max_mu) sigma = random.uniform(min_sigma, max_sigma) cand = NormalDistribution(mu, sigma) acc = 1 for d in c_j: KL_dist = sym_kl_dist(c_j[d], cand) if KL_dist > KL_upper or KL_dist < KL_lower: acc = 0 tries += 1 break if tries >= max_tries: raise RuntimeError # print '.', #print for cind in grp: c_j[cind] = cand else: RuntimeError leaders.append(leaders_j) groups.append(groups_j) C.append(c_j) comps = [] for i in range(G): comps.append(ProductDistribution([C[j][i] for j in range(p)])) pi = get_random_pi(G, 0.3 / G) #print '** pi =',pi # create prior piprior = DirichletPrior(G, [2.0] * G) cprior = [] for j in range(p): if featureTypes[j] == 0: cprior.append(DirichletPrior(M, [1.02] * M)) elif featureTypes[j] == 1: cprior.append(NormalGammaPrior( 0, 0, 0, 0)) # dummy parameters, to be set later else: RuntimeError mprior = MixtureModelPrior(0.1, 0.1, piprior, cprior) m = BayesMixtureModel(G, pi, comps, mprior, struct=1) m.leaders = leaders m.groups = groups m.identifiable() m.updateFreeParams() #print m return m