def predict_communities(self, deg_corr): if self.is_weighted: state = gt.minimize_blockmodel_dl(self.coocurence_graph, overlap=self.allow_overlap, deg_corr=deg_corr, layers=True, state_args=dict(ec=self.weights, layers=False)) else: state = gt.minimize_blockmodel_dl(self.coocurence_graph, overlap=self.allow_overlap, deg_corr=deg_corr) state = state.copy(B=self.coocurence_graph.num_vertices()) self.dls_[deg_corr] = [] # description length history self.vm_[deg_corr] = None # vertex marginals self.em_[deg_corr] = None # edge marginals self.h_[deg_corr] = np.zeros(self.coocurence_graph.num_vertices() + 1) def collect_marginals(s, deg_corr, obj): obj.vm_[deg_corr] = s.collect_vertex_marginals(obj.vm_[deg_corr]) obj.em_[deg_corr] = s.collect_edge_marginals(obj.em_[deg_corr]) obj.dls_[deg_corr].append(s.entropy()) B = s.get_nonempty_B() obj.h_[deg_corr][B] += 1 collect_marginals_for_class = lambda s: collect_marginals( s, deg_corr, self) # Now we collect the marginal distributions for exactly 200,000 sweeps gt.mcmc_equilibrate(state, force_niter=self.n_iters, mcmc_args=dict(niter=self.n_init_iters), callback=collect_marginals_for_class, **self.equlibrate_options) S_mf = gt.mf_entropy(self.coocurence_graph, self.vm_[deg_corr]) S_bethe = gt.bethe_entropy(self.coocurence_graph, self.em_[deg_corr])[0] L = -np.mean(self.dls_[deg_corr]) self.state_[deg_corr] = copy.copy(state) self.S_bethe_[deg_corr] = copy.copy(S_bethe) self.S_mf_[deg_corr] = copy.copy(S_mf) self.L_[deg_corr] = copy.copy(L) if self.verbose: print(("Model evidence for deg_corr = %s:" % deg_corr, L + S_mf, "(mean field),", L + S_bethe, "(Bethe)"))
def collect_marginals(s): global pv, pe b = gt.perfect_prop_hash([s.b])[0] pv = s.collect_vertex_marginals(pv, b=b) pe = s.collect_edge_marginals(pe) dls.append(s.entropy()) # Apply MCMC gt.mcmc_equilibrate(state, force_niter=10000, mcmc_args=dict(niter=10), callback=collect_marginals) entropy = state.entropy() S_mf = gt.mf_entropy(g, pv) S_bethe = gt.bethe_entropy(g, pe)[0] L = -np.mean(dls) nClass = len(np.unique(state.get_blocks().a)) print("%d classes, entropy %f, mean_field %f, bethe %f" % (nClass, entropy, L + S_mf, L + S_bethe)) # Save final graph fname = "SBM_mcmc_%d_%f" % (nClass, entropy) write_classes(os.path.join(outdir, fname + ".tsv"), g, state) pickle.dump([g, state, dls, pv, pe], open(os.path.join(outdir, fname + ".pickle"), "wb"), -1) g.save(os.path.join(outdir, fname + ".gt.gz")) # Find the maximal nClass over all mcmc sweeps nClassMax = 0 for v in g.vertices():