def bipartite_op_gen_zipf(g, params): if len(params) != 8: error('invalid number of arguments for "genzipf"') zipf_alpha2 = float(params[0]) zipf_N2 = int(params[1]) zipf_xmin2 = int(params[2]) n2 = int(params[3]) zipf_alpha3 = float(params[4]) zipf_N3 = int(params[5]) zipf_xmin3 = int(params[6]) n3 = int(params[7]) z2 = Zipf(zipf_alpha2, zipf_N2, zipf_xmin2) z3 = Zipf(zipf_alpha3, zipf_N3, zipf_xmin3) z2_exp = z2.expectation() z3_exp = z3.expectation() info(" L2 distrib expectation = %.20f" % (z2_exp)) info(" L3 distrib expectation = %.20f" % (z3_exp)) if (n2 <= 0) and (n3 > 0): n2 = int(round((n3 * z3_exp) / z2_exp)) info(" n2 auto-computed = %d" % (n2)) elif (n2 > 0) and (n3 <= 0): n3 = int(round((n2 * z2_exp) / z3_exp)) info(" n3 auto-computed = %d" % (n3)) else: error(" n2 and n3 cannot both be undefined") info(" a2=%f, N2=%d, xmin2=%d, n2=%d" % (zipf_alpha2, zipf_N2, zipf_xmin2, n2)) info(" a3=%f, N3=%d, xmin3=%d, n3=%d" % (zipf_alpha3, zipf_N3, zipf_xmin3, n3)) # Check expected number of edges at level 2 # against expected number of edges at level 3 l2_exp = z2_exp * n2 l3_exp = z3_exp * n3 info(" Expected total L2 degree = %.20f" % (l2_exp)) info(" Expected total L3 degree = %.20f" % (l3_exp)) (degrees2, degrees3) = random_matching_degrees(z2, n2, z3, n3) log(1, " MLE L2 = %f" % (Zipf.mle(degrees2, zipf_xmin2))) log(1, " MLE L3 = %f" % (Zipf.mle(degrees3, zipf_xmin3))) return bipartite_cm(degrees2, degrees3)
def _bipartite_op_stats_flat(g, output=None): # Vertex/edge count info(" Number of vertices: %d" % (g.vcount())) info(" Number of edges: %d" % (g.ecount())) degrees = g.degree() max_degree = g.maxdegree() info(" Highest degree: %d" % (max_degree)) # Connectedness if g.is_connected(): info(" Graph is connected") else: info(" Graph is not connected") clusters = g.clusters() info(" Number of connected components = %d" % (len(clusters))) giant = clusters.giant() info(" Size of largest component = %d" % (giant.vcount())) # Power law exponent estimators # mle= Pareto.mle(degrees) mle = Zipf.mle(degrees) info(" Maximum Likelihood Estimator:") info(" alpha (xm=1): %.20f" % (mle)) # Node degrees distributions if output: fig = plt.figure() ax = plt.subplot(1, 1, 1) _plot_degree_hist(ax, degrees, max_degree) filename = "degree-hist-%s" % (output) info(' Create "%s"' % (filename)) fig.savefig(filename) fig = plt.figure() ax = plt.subplot(1, 1, 1) _plot_degree_dist_loglog(ax, degrees, max_degree) filename = "degree-dist-loglog-%s" % (output) info(' Create "%s"' % (filename)) fig.savefig(filename) else: fig = plt.figure() ax = plt.subplot(1, 2, 1) _plot_degree_hist(ax, degrees, max_degree) ax = plt.subplot(1, 2, 2) _plot_degree_dist_loglog(ax, degrees, max_degree) plt.show() return g
def _bipartite_op_stats_bipartite(g, output=None): # Vertex/edge count l2_vertices = [idx for idx in range(g.vcount()) if not (g.vs["type"][idx])] l3_vertices = [idx for idx in range(g.vcount()) if g.vs["type"][idx]] info(" Number of L2 vertices: %d" % (len(l2_vertices))) info(" Number of L3 vertices: %d" % (len(l3_vertices))) info(" Number of edges: %d" % (g.ecount())) l2_degrees = g.degree(l2_vertices) l3_degrees = g.degree(l3_vertices) l3_degrees = [i for i in l3_degrees if i > 0] max_l2_degree = g.maxdegree(l2_vertices) max_l3_degree = g.maxdegree(l3_vertices) if max_l2_degree > max_l3_degree: max_degree = max_l2_degree else: max_degree = max_l3_degree info(" Highest L2 degree: %d" % (max_l2_degree)) info(" Highest L3 degree: %d" % (max_l3_degree)) l2_degrees_hist = numpy.histogram(l2_degrees, bins=max_degree, range=(0, max_degree)) l3_degrees_hist = numpy.histogram(l3_degrees, bins=max_degree, range=(0, max_degree)) # Connectedness if g.is_connected(): info(" Graph is connected") else: info(" Graph is not connected") clusters = g.clusters() info(" Number of connected components = %d" % (len(clusters))) giant = clusters.giant() info(" Size of largest component = %d" % (giant.vcount())) # L2/L3 power law exponents estimators info(" Maximum Likelihood Estimator:") l2_xmin_range = range(1, 4) l2_mle = _estimate_pareto_params(l2_degrees, l2_xmin_range) for xmin in l2_xmin_range: info(" alpha (L2, xm=%d): %.20f" % (xmin, l2_mle[xmin])) # l3_mle= Pareto.mle(l3_degrees) l3_mle = Zipf.mle(l3_degrees) info(" alpha (L3, xm=1): %.20f" % (l3_mle)) # L2/L3 degrees distributions if output: fig = plt.figure() ax = plt.subplot(1, 1, 1) _plot_degree_hist(ax, l2_degrees, max_degree) for xmin in l2_xmin_range: _plot_pareto_pdf(ax, l2_mle[xmin], xmin, l2_degrees, max_degree, hist=False) filename = "l2-degree-hist-%s" % (output) info(' Create "%s"' % (filename)) fig.savefig(filename) fig = plt.figure() ax = plt.subplot(1, 1, 1) _plot_degree_dist_loglog(ax, l2_degrees, max_degree) for xmin in l2_xmin_range: _plot_pareto_pdf(ax, l2_mle[xmin], xmin, l2_degrees, max_degree) filename = "l2-degree-dist-loglog-%s" % (output) info(' Create "%s"' % (filename)) fig.savefig(filename) fig = plt.figure() ax = plt.subplot(1, 1, 1) _plot_degree_hist(ax, l3_degrees, max_degree) _plot_pareto_pdf(ax, l3_mle, 1, l3_degrees, max_degree, hist=False) filename = "l3-degree-hist-%s" % (output) info(' Create "%s"' % (filename)) fig.savefig(filename) fig = plt.figure() ax = plt.subplot(1, 1, 1) _plot_degree_dist_loglog(ax, l3_degrees, max_degree) _plot_pareto_pdf(ax, l3_mle, 1, l3_degrees, max_degree) filename = "l3-degree-dist-loglog-%s" % (output) info(' Create "%s"' % (filename)) fig.savefig(filename) else: fig = plt.figure() ax = plt.subplot(2, 2, 1) _plot_degree_hist(ax, l2_degrees, max_degree) ax = plt.subplot(2, 2, 2) _plot_degree_dist_loglog(ax, l2_degrees, max_degree) for xmin in l2_xmin_range: _plot_pareto_pdf(ax, l2_mle[xmin], xmin, l2_degrees, max_degree) ax = plt.subplot(2, 2, 3) _plot_degree_hist(ax, l3_degrees, max_degree) ax = plt.subplot(2, 2, 4) _plot_degree_dist_loglog(ax, l3_degrees, max_degree) _plot_pareto_pdf(ax, l3_mle, 1, l3_degrees, max_degree) plt.show() return g
def _estimate_pareto_params(degrees, beta_range): alphas = {} for beta in beta_range: # alphas[beta]= Pareto.mle(degrees, beta) alphas[beta] = Zipf.mle(degrees, beta) return alphas