def draw_cdf(self, sequence, fig_ax=None, title=None, legend_label=None, x_label="Degree", y_label="CDF", style='b-', marker='o', is_log=True): if not fig_ax: fig_ax = plt.subplots(figsize=figsize) powerlaw.plot_cdf(sequence, linewidth=linewidth, marker=marker, color=style[0], ax=fig_ax[1], label=legend_label) fig_ax[1].grid(True) if title: fig_ax[1].set_title(title, fontsize=title_fontsize) fig_ax[1].set_xlabel(x_label, fontsize=label_fontsize) fig_ax[1].set_ylabel(y_label, fontsize=label_fontsize) #fig_ax[1].set_xlim(left=0, right=500) plt.yscale('linear') if legend_label: fig_ax[1].legend(fontsize=legend_fontsize) #fig_ax[1].legend(fontsize=legend_fontsize, loc='lower right') fig_ax[1].tick_params(size=tick_fontsize) fig_ax[0].tight_layout() return fig_ax
def plot_entropy_ccdf(): entropy = read_pickle('output/normalized_entropy.obj') fig = plt.figure() ax = fig.add_subplot(111) powerlaw.plot_ccdf(entropy, ax, label='normalized entropy') # further plotting ax.set_xlabel("Normalized entropy e") ax.set_ylabel("Pr(X>=e)") plt.legend(fancybox=True, loc='lower left', ncol=1,prop={'size':5}) plt.tight_layout() plt.savefig('output/normalized_entropy_distribution_ccdf.pdf') fig = plt.figure() ax = fig.add_subplot(111) powerlaw.plot_cdf(entropy, ax, label='normalized entropy',color='r') # further plotting ax.set_xlabel("Normalized entropy e") ax.set_ylabel("Pr(X<=e)") plt.legend(fancybox=True, loc='lower left', ncol=1,prop={'size':5}) plt.tight_layout() plt.savefig('output/normalized_entropy_distribution_cdf.pdf')
def plot_entropy_ccdf(): entropy = read_pickle('output/normalized_entropy.obj') fig = plt.figure() ax = fig.add_subplot(111) powerlaw.plot_ccdf(entropy, ax, label='normalized entropy') # further plotting ax.set_xlabel("Normalized entropy e") ax.set_ylabel("Pr(X>=e)") plt.legend(fancybox=True, loc='lower left', ncol=1, prop={'size': 5}) plt.tight_layout() plt.savefig('output/normalized_entropy_distribution_ccdf.pdf') fig = plt.figure() ax = fig.add_subplot(111) powerlaw.plot_cdf(entropy, ax, label='normalized entropy', color='r') # further plotting ax.set_xlabel("Normalized entropy e") ax.set_ylabel("Pr(X<=e)") plt.legend(fancybox=True, loc='lower left', ncol=1, prop={'size': 5}) plt.tight_layout() plt.savefig('output/normalized_entropy_distribution_cdf.pdf')
import scipy.io import networkx as nx import matplotlib.pyplot as plt import powerlaw sparse_mat = scipy.io.mmread('as-22july06.mtx') G = nx.from_scipy_sparse_matrix(sparse_mat) degree_sequence = sorted([y for x, y in G.degree()], reverse=True) powerlaw.plot_cdf(degree_sequence) # plt.show() plt.savefig("cumulative-degree-distribution.png")
if __name__ == '__main__': estimate_dir = "/home/valentin/Desktop/Thesis II/Zipf Error/Estimates" lang = "NO" estimate_file = lang + "_ToktokTokenizer_ArticleSplitter" reader = TableReader(estimate_dir + "/" + estimate_file, [str, int, int]) data = reader.read_data() counts = data["count"] pos_counts = [c for c in counts if c > 0] print(counts[:10]) print(min(counts)) powerlaw.plot_cdf(counts) # plt.show() powerlaw.plot_pdf(pos_counts) # plt.show() fitted_dist = powerlaw.Fit(pos_counts, discrete=True) for key, val in fitted_dist.__dict__.items(): print( key, ":\t", val if hasattr(val, "__len__") and len(val) < 100 else "val too long") print() print("\n\n", fitted_dist.find_xmin())
r = df_primary.groupby(['patent_id'])['subgroup_id'].nunique() z = r.value_counts().sort_index() z.cumsum() / z.sum() # In[12]: df.groupby(['patent_id'])['subgroup_aggregated_id'].nunique().mean() # In[13]: df_primary.groupby(['patent_id'])['subgroup_aggregated_id'].nunique().mean() # In[14]: import powerlaw powerlaw.plot_cdf(df_primary['group_id'].value_counts().values) yscale('linear') xscale('log') print(df_primary['group_id'].nunique()) # In[15]: import powerlaw z = df_primary['subgroup_aggregated_id'].value_counts() powerlaw.plot_cdf(z.values) yscale('linear') xscale('log') print(len(z)) mean(z > sqrt(len(z)))**2 # In[16]:
def plot_features(): print 'before load' network = load_graph("output/wikipedianetwork.xml.gz") print 'after load' print 'before load' network_transitions = load_graph("output/transitionsnetwork.xml.gz") print 'after load' colors= {'local_clust':'r','eigenvector_centr':'b', 'page_rank': 'g', 'kcore':'m', 'hub': 'c', 'authority':'k'} labels = {'local_clust': 'clust.', 'eigenvector_centr':'eigen. centr.','page_rank': 'page rank', 'kcore': 'kcore', 'hub':'hub', 'authority':'authority'} fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust','page_rank', 'hub', 'authority', 'kcore']: feature = network_transitions.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f],color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X>=f)$') ax.set_ylim([0, 1]) plt.legend(fancybox=True, loc=3, ncol=2, prop={'size':4}) plt.tight_layout() plt.savefig('output/wikipedia-transitions-features-cdf.pdf') plt.clf() colors= {'local_clust':'r','eigenvector_centr':'b', 'page_rank': 'g', 'kcore':'m', 'hub': 'c', 'authority':'k'} labels = {'local_clust': 'clust.', 'eigenvector_centr':'eigen. centr.','page_rank': 'page rank', 'kcore': 'kcore', 'hub':'hub', 'authority':'authority'} fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust','page_rank', 'hub', 'authority', 'kcore']: feature = network_transitions.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f],color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X<=f)$') plt.legend(fancybox=True, loc=3, ncol=2, prop={'size':4}) plt.tight_layout() plt.savefig('output/wikipedia-transitions-features-ccdf.pdf') plt.clf() colors= {'local_clust':'r','eigenvector_centr':'b', 'page_rank': 'g', 'kcore':'m', 'hub': 'c', 'authority':'k'} labels = {'local_clust': 'clust.', 'eigenvector_centr':'eigen. centr.','page_rank': 'page rank', 'kcore': 'kcore', 'hub':'hub', 'authority':'authority'} fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust','page_rank', 'hub', 'authority', 'kcore']: feature = network.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f],color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X>=f)$') ax.set_ylim([0, 1]) plt.legend(fancybox=True, loc=3, ncol=2, prop={'size':4}) plt.tight_layout() plt.savefig('output/wikipedia-features-cdf.pdf') plt.clf() colors= {'local_clust':'r','eigenvector_centr':'b', 'page_rank': 'g', 'kcore':'m', 'hub': 'c', 'authority':'k'} labels = {'local_clust': 'clust.', 'eigenvector_centr':'eigen. centr.','page_rank': 'page rank', 'kcore': 'kcore', 'hub':'hub', 'authority':'authority'} fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust','eigenvector_centr','page_rank', 'hub', 'authority', 'kcore']: feature = network.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f],color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X<=f)$') plt.legend(fancybox=True, loc=3, ncol=2, prop={'size':4}) plt.tight_layout() plt.savefig('output/wikipedia-features-ccdf.pdf')
def plot_stats(): # wikipedia graph structural statistics print 'before load' network = load_graph("output/wikipedianetwork.xml.gz") print 'after load' out_hist = vertex_hist(network, "out") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(out_hist[1][:-1], out_hist[0], marker='o') plt.xlabel('Out-degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('Out-degree Distribution') plt.tight_layout() plt.savefig('output/wikipedia-out-deg-dist.pdf') plt.clf() in_hist = vertex_hist(network, "in") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(in_hist[1][:-1], in_hist[0], marker='o') plt.xlabel('In-degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('In-degree Distribution') plt.tight_layout() plt.savefig('output/wikipedia-in-deg-dist.pdf') plt.clf() total_hist = vertex_hist(network, "total") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(total_hist[1][:-1], total_hist[0], marker='o') plt.xlabel('Degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('Degree Distribution') plt.tight_layout() plt.savefig('output/wikipedia-deg-dist.pdf') plt.clf() clust = network.vertex_properties["local_clust"] #clust = local_clustering(network, undirected=False) #hist, bin_edges = np.histogram(clust.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Local Clustering Coefficient C') #plt.ylabel('P(x<=C)') #plt.title('Clustering Coefficient Distribution') #plt.savefig('output/wikipedia-clust-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(clust.get_array(), ax) #ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient $C') ax.set_ylabel('P(x<=C)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-clust-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(clust.get_array(), ax) #ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient C') ax.set_ylabel('P(x>=C)') ax.set_ylim([10**-4, 10**-0.5]) fig.tight_layout() fig.savefig('output/wikipedia-clust-ccdf.pdf') plt.clf() prank = network.vertex_properties["page_rank"] #hist, bin_edges = np.histogram(prank.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Page rank Pr') #plt.ylabel('P(x<=Pr)') #plt.title('Page rank Distribution') #plt.savefig('output/wikipedia-prank-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x<=Pr)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-prank-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x>=Pr)') fig.tight_layout() fig.savefig('output/wikipedia-prank-ccdf.pdf') plt.clf() kcore = network.vertex_properties["kcore"] #hist, bin_edges = np.histogram(kcore.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Kcore kC') #plt.ylabel('P(x<=kC)') #plt.title('K-Core Distribution') #plt.savefig('output/wikipedia-kcore-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x<=kC)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-kcore-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x>=kC)') fig.tight_layout() fig.savefig('output/wikipedia-kcore-ccdf.pdf') plt.clf() eigenvector_centr = network.vertex_properties["eigenvector_centr"] #hist, bin_edges = np.histogram(eigenvector_centr.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Eigenvector Centrality E') #plt.ylabel('P(x<=E)') #plt.title('Eigenvector Centrality Distribution') #plt.savefig('output/wikipedia-eigenvcentr-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality E') ax.set_xlabel('Eigenvector Centrality E') ax.set_ylabel('P(x<=E)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-eigenvcentr-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality E') ax.set_xlabel('Eigenvector Centrality E') ax.set_ylabel('P(x>=E)') fig.tight_layout() fig.savefig('output/wikipedia-eigenvcentr-ccdf.pdf') plt.clf() colors= {'local_clust':'r','eigenvector_centr':'b', 'page_rank': 'g', 'kcore':'m', 'hub': 'c', 'authority':'k'} labels = {'local_clust': 'clust.', 'eigenvector_centr':'eigen. centr.','page_rank': 'page rank', 'kcore': 'kcore', 'hub':'hub', 'authority':'authority'} fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust','page_rank', 'hub', 'authority', 'kcore']: feature = network.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f],color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X>=f)$') ax.set_ylim([0, 1]) plt.legend(fancybox=True, loc=3, ncol=2, prop={'size':4}) plt.tight_layout() plt.savefig('output/wikipedia-features-cdf.pdf') plt.clf() colors= {'local_clust':'r','eigenvector_centr':'b', 'page_rank': 'g', 'kcore':'m', 'hub': 'c', 'authority':'k'} labels = {'local_clust': 'clust.', 'eigenvector_centr':'eigen. centr.','page_rank': 'page rank', 'kcore': 'kcore', 'hub':'hub', 'authority':'authority'} fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust','eigenvector_centr','page_rank', 'hub', 'authority', 'kcore']: feature = network.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f],color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X<=f)$') plt.legend(fancybox=True, loc=3, ncol=2, prop={'size':4}) plt.tight_layout() plt.savefig('output/wikipedia-features-ccdf.pdf') plt.clf() # wikipedia transitions graph structural statistics print 'before load' network_transitions = load_graph("output/transitionsnetwork.xml.gz") print 'after load' out_hist = vertex_hist(network_transitions, "out") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(out_hist[1][:-1], out_hist[0], marker='o') plt.xlabel('Out-degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('Out-degree Distribution') plt.savefig('output/wikipedia-transitions-out-deg-dist.pdf') plt.clf() in_hist = vertex_hist(network_transitions, "in") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(in_hist[1][:-1], in_hist[0], marker='o') plt.xlabel('In-degree') plt.ylabel('Frequency') #plt.title('In-degree Distribution') plt.gca().set_ylim([1, 10**6]) plt.savefig('output/wikipedia-transitions-in-deg-dist.pdf') plt.clf() total_hist = vertex_hist(network_transitions, "total") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(total_hist[1][:-1], total_hist[0], marker='o') plt.xlabel('Degree') plt.ylabel('Frequency') #plt.title('Degree Distribution') plt.gca().set_ylim([1, 10**6]) plt.savefig('output/wikipedia-transitions-deg-dist.pdf') plt.clf() #clust = local_clustering(network_transitions, undirected=False) clust = network_transitions.vertex_properties["local_clust"] #hist, bin_edges = np.histogram(clust.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Local Clustering Coefficient C') #plt.ylabel('P(x<=C)') #plt.title('Clustering Coefficient Distribution') #plt.savefig('output/wikipedia-transitions-clust-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(clust.get_array(), ax) #ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient C') ax.set_ylabel('P(x<=C)') fig.savefig('output/wikipedia-transitions-clust-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(clust.get_array(), ax) ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient C') ax.set_ylabel('P(x>=C)') fig.savefig('output/wikipedia-transitions-clust-ccdf.pdf') plt.clf() prank = network_transitions.vertex_properties["page_rank"] #hist, bin_edges = np.histogram(prank.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Page rank Pr') #plt.ylabel('P(x<=Pr)') #plt.title('Page rank Distribution') #plt.savefig('output/wikipedia-transitions-prank-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x<=Pr)') fig.savefig('output/wikipedia-transitions-prank-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x>=Pr)') fig.savefig('output/wikipedia-transitions-prank-ccdf.pdf') plt.clf() kcore = network_transitions.vertex_properties["kcore"] #hist, bin_edges = np.histogram(kcore.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Kcore kC') #plt.ylabel('P(x<=kC)') #plt.title('K-Core Distribution') #plt.savefig('output/wikipedia-transitions-kcore-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x<=kC)') fig.savefig('output/wikipedia-transitions-kcore-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x>=kC)') fig.savefig('output/wikipedia-transitions-kcore-ccdf.pdf') plt.clf() eigenvector_centr = network_transitions.vertex_properties["eigenvector_centr"] #hist, bin_edges = np.histogram(eigenvector_centr.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Eingenvector centrality E') #plt.ylabel('P(x<=E)') #plt.title('Eigenvector Centrality Distribution') #plt.savefig('output/wikipedia-transitions-eigenvcentr-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality Distribution') ax.set_xlabel('Eingenvector centrality E') ax.set_ylabel('P(x<=E)') fig.savefig('output/wikipedia-transitions-eigenvcentr-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality Distribution') ax.set_xlabel('Eingenvector centrality E') ax.set_ylabel('P(x>=E)') fig.savefig('output/wikipedia-transitions-eigenvcentr-ccdf.pdf') plt.clf() print 'before hits' #ee, authority, hub = hits(network_transitions) #network_transitions.vertex_properties["authority"] = authority #network_transitions.vertex_properties["hub"] = hub #network_transitions.save("output/transitionsnetwork.xml.gz") print 'after hits' colors= {'local_clust':'r','eigenvector_centr':'b', 'page_rank': 'g', 'kcore':'m', 'hub': 'c', 'authority':'k'} labels = {'local_clust': 'clust.', 'eigenvector_centr':'eigen. centr.','page_rank': 'page rank', 'kcore': 'kcore', 'hub':'hub', 'authority':'authority'} fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust','page_rank', 'hub', 'authority', 'kcore']: feature = network_transitions.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f],color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X>=f)$') ax.set_ylim([0, 1]) plt.legend(fancybox=True, loc=3, ncol=2, prop={'size':4}) plt.tight_layout() plt.savefig('output/wikipedia-transitions-features-cdf.pdf') plt.clf() colors= {'local_clust':'r','eigenvector_centr':'b', 'page_rank': 'g', 'kcore':'m', 'hub': 'c', 'authority':'k'} labels = {'local_clust': 'clust.', 'eigenvector_centr':'eigen. centr.','page_rank': 'page rank', 'kcore': 'kcore', 'hub':'hub', 'authority':'authority'} fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust','page_rank', 'hub', 'authority', 'kcore']: feature = network_transitions.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f],color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X<=f)$') plt.legend(fancybox=True, loc=3, ncol=2, prop={'size':4}) plt.tight_layout() plt.savefig('output/wikipedia-transitions-features-ccdf.pdf') plt.clf()
def plot_features(): print 'before load' network = load_graph("output/wikipedianetwork.xml.gz") print 'after load' print 'before load' network_transitions = load_graph("output/transitionsnetwork.xml.gz") print 'after load' colors = { 'local_clust': 'r', 'eigenvector_centr': 'b', 'page_rank': 'g', 'kcore': 'm', 'hub': 'c', 'authority': 'k' } labels = { 'local_clust': 'clust.', 'eigenvector_centr': 'eigen. centr.', 'page_rank': 'page rank', 'kcore': 'kcore', 'hub': 'hub', 'authority': 'authority' } fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust', 'page_rank', 'hub', 'authority', 'kcore']: feature = network_transitions.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f], color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X>=f)$') ax.set_ylim([0, 1]) plt.legend(fancybox=True, loc=3, ncol=2, prop={'size': 4}) plt.tight_layout() plt.savefig('output/wikipedia-transitions-features-cdf.pdf') plt.clf() colors = { 'local_clust': 'r', 'eigenvector_centr': 'b', 'page_rank': 'g', 'kcore': 'm', 'hub': 'c', 'authority': 'k' } labels = { 'local_clust': 'clust.', 'eigenvector_centr': 'eigen. centr.', 'page_rank': 'page rank', 'kcore': 'kcore', 'hub': 'hub', 'authority': 'authority' } fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust', 'page_rank', 'hub', 'authority', 'kcore']: feature = network_transitions.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f], color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X<=f)$') plt.legend(fancybox=True, loc=3, ncol=2, prop={'size': 4}) plt.tight_layout() plt.savefig('output/wikipedia-transitions-features-ccdf.pdf') plt.clf() colors = { 'local_clust': 'r', 'eigenvector_centr': 'b', 'page_rank': 'g', 'kcore': 'm', 'hub': 'c', 'authority': 'k' } labels = { 'local_clust': 'clust.', 'eigenvector_centr': 'eigen. centr.', 'page_rank': 'page rank', 'kcore': 'kcore', 'hub': 'hub', 'authority': 'authority' } fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust', 'page_rank', 'hub', 'authority', 'kcore']: feature = network.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f], color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X>=f)$') ax.set_ylim([0, 1]) plt.legend(fancybox=True, loc=3, ncol=2, prop={'size': 4}) plt.tight_layout() plt.savefig('output/wikipedia-features-cdf.pdf') plt.clf() colors = { 'local_clust': 'r', 'eigenvector_centr': 'b', 'page_rank': 'g', 'kcore': 'm', 'hub': 'c', 'authority': 'k' } labels = { 'local_clust': 'clust.', 'eigenvector_centr': 'eigen. centr.', 'page_rank': 'page rank', 'kcore': 'kcore', 'hub': 'hub', 'authority': 'authority' } fig = plt.figure() ax = fig.add_subplot(111) for f in [ 'local_clust', 'eigenvector_centr', 'page_rank', 'hub', 'authority', 'kcore' ]: feature = network.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f], color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X<=f)$') plt.legend(fancybox=True, loc=3, ncol=2, prop={'size': 4}) plt.tight_layout() plt.savefig('output/wikipedia-features-ccdf.pdf')
def plot_stats(): # wikipedia graph structural statistics print 'before load' network = load_graph("output/wikipedianetwork.xml.gz") print 'after load' out_hist = vertex_hist(network, "out") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(out_hist[1][:-1], out_hist[0], marker='o') plt.xlabel('Out-degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('Out-degree Distribution') plt.tight_layout() plt.savefig('output/wikipedia-out-deg-dist.pdf') plt.clf() in_hist = vertex_hist(network, "in") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(in_hist[1][:-1], in_hist[0], marker='o') plt.xlabel('In-degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('In-degree Distribution') plt.tight_layout() plt.savefig('output/wikipedia-in-deg-dist.pdf') plt.clf() total_hist = vertex_hist(network, "total") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(total_hist[1][:-1], total_hist[0], marker='o') plt.xlabel('Degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('Degree Distribution') plt.tight_layout() plt.savefig('output/wikipedia-deg-dist.pdf') plt.clf() clust = network.vertex_properties["local_clust"] #clust = local_clustering(network, undirected=False) #hist, bin_edges = np.histogram(clust.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Local Clustering Coefficient C') #plt.ylabel('P(x<=C)') #plt.title('Clustering Coefficient Distribution') #plt.savefig('output/wikipedia-clust-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(clust.get_array(), ax) #ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient $C') ax.set_ylabel('P(x<=C)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-clust-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(clust.get_array(), ax) #ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient C') ax.set_ylabel('P(x>=C)') ax.set_ylim([10**-4, 10**-0.5]) fig.tight_layout() fig.savefig('output/wikipedia-clust-ccdf.pdf') plt.clf() prank = network.vertex_properties["page_rank"] #hist, bin_edges = np.histogram(prank.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Page rank Pr') #plt.ylabel('P(x<=Pr)') #plt.title('Page rank Distribution') #plt.savefig('output/wikipedia-prank-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x<=Pr)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-prank-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x>=Pr)') fig.tight_layout() fig.savefig('output/wikipedia-prank-ccdf.pdf') plt.clf() kcore = network.vertex_properties["kcore"] #hist, bin_edges = np.histogram(kcore.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Kcore kC') #plt.ylabel('P(x<=kC)') #plt.title('K-Core Distribution') #plt.savefig('output/wikipedia-kcore-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x<=kC)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-kcore-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x>=kC)') fig.tight_layout() fig.savefig('output/wikipedia-kcore-ccdf.pdf') plt.clf() eigenvector_centr = network.vertex_properties["eigenvector_centr"] #hist, bin_edges = np.histogram(eigenvector_centr.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Eigenvector Centrality E') #plt.ylabel('P(x<=E)') #plt.title('Eigenvector Centrality Distribution') #plt.savefig('output/wikipedia-eigenvcentr-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality E') ax.set_xlabel('Eigenvector Centrality E') ax.set_ylabel('P(x<=E)') ax.set_ylim([0, 1]) fig.tight_layout() fig.savefig('output/wikipedia-eigenvcentr-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality E') ax.set_xlabel('Eigenvector Centrality E') ax.set_ylabel('P(x>=E)') fig.tight_layout() fig.savefig('output/wikipedia-eigenvcentr-ccdf.pdf') plt.clf() colors = { 'local_clust': 'r', 'eigenvector_centr': 'b', 'page_rank': 'g', 'kcore': 'm', 'hub': 'c', 'authority': 'k' } labels = { 'local_clust': 'clust.', 'eigenvector_centr': 'eigen. centr.', 'page_rank': 'page rank', 'kcore': 'kcore', 'hub': 'hub', 'authority': 'authority' } fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust', 'page_rank', 'hub', 'authority', 'kcore']: feature = network.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f], color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X>=f)$') ax.set_ylim([0, 1]) plt.legend(fancybox=True, loc=3, ncol=2, prop={'size': 4}) plt.tight_layout() plt.savefig('output/wikipedia-features-cdf.pdf') plt.clf() colors = { 'local_clust': 'r', 'eigenvector_centr': 'b', 'page_rank': 'g', 'kcore': 'm', 'hub': 'c', 'authority': 'k' } labels = { 'local_clust': 'clust.', 'eigenvector_centr': 'eigen. centr.', 'page_rank': 'page rank', 'kcore': 'kcore', 'hub': 'hub', 'authority': 'authority' } fig = plt.figure() ax = fig.add_subplot(111) for f in [ 'local_clust', 'eigenvector_centr', 'page_rank', 'hub', 'authority', 'kcore' ]: feature = network.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f], color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X<=f)$') plt.legend(fancybox=True, loc=3, ncol=2, prop={'size': 4}) plt.tight_layout() plt.savefig('output/wikipedia-features-ccdf.pdf') plt.clf() # wikipedia transitions graph structural statistics print 'before load' network_transitions = load_graph("output/transitionsnetwork.xml.gz") print 'after load' out_hist = vertex_hist(network_transitions, "out") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(out_hist[1][:-1], out_hist[0], marker='o') plt.xlabel('Out-degree') plt.ylabel('Frequency') plt.gca().set_ylim([1, 10**6]) #plt.title('Out-degree Distribution') plt.savefig('output/wikipedia-transitions-out-deg-dist.pdf') plt.clf() in_hist = vertex_hist(network_transitions, "in") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(in_hist[1][:-1], in_hist[0], marker='o') plt.xlabel('In-degree') plt.ylabel('Frequency') #plt.title('In-degree Distribution') plt.gca().set_ylim([1, 10**6]) plt.savefig('output/wikipedia-transitions-in-deg-dist.pdf') plt.clf() total_hist = vertex_hist(network_transitions, "total") plt.gca().set_yscale('log') plt.gca().set_xscale('log') plt.plot(total_hist[1][:-1], total_hist[0], marker='o') plt.xlabel('Degree') plt.ylabel('Frequency') #plt.title('Degree Distribution') plt.gca().set_ylim([1, 10**6]) plt.savefig('output/wikipedia-transitions-deg-dist.pdf') plt.clf() #clust = local_clustering(network_transitions, undirected=False) clust = network_transitions.vertex_properties["local_clust"] #hist, bin_edges = np.histogram(clust.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Local Clustering Coefficient C') #plt.ylabel('P(x<=C)') #plt.title('Clustering Coefficient Distribution') #plt.savefig('output/wikipedia-transitions-clust-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(clust.get_array(), ax) #ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient C') ax.set_ylabel('P(x<=C)') fig.savefig('output/wikipedia-transitions-clust-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(clust.get_array(), ax) ax.set_title('Clustering Coefficient Distribution') ax.set_xlabel('Local Clustering Coefficient C') ax.set_ylabel('P(x>=C)') fig.savefig('output/wikipedia-transitions-clust-ccdf.pdf') plt.clf() prank = network_transitions.vertex_properties["page_rank"] #hist, bin_edges = np.histogram(prank.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Page rank Pr') #plt.ylabel('P(x<=Pr)') #plt.title('Page rank Distribution') #plt.savefig('output/wikipedia-transitions-prank-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x<=Pr)') fig.savefig('output/wikipedia-transitions-prank-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(prank.get_array(), ax) #ax.set_title('Page Rank Distribution') ax.set_xlabel('Page rank Pr') ax.set_ylabel('P(x>=Pr)') fig.savefig('output/wikipedia-transitions-prank-ccdf.pdf') plt.clf() kcore = network_transitions.vertex_properties["kcore"] #hist, bin_edges = np.histogram(kcore.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Kcore kC') #plt.ylabel('P(x<=kC)') #plt.title('K-Core Distribution') #plt.savefig('output/wikipedia-transitions-kcore-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x<=kC)') fig.savefig('output/wikipedia-transitions-kcore-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(kcore.get_array(), ax) #ax.set_title('K-Core Distribution') ax.set_xlabel('k-Core kC') ax.set_ylabel('P(x>=kC)') fig.savefig('output/wikipedia-transitions-kcore-ccdf.pdf') plt.clf() eigenvector_centr = network_transitions.vertex_properties[ "eigenvector_centr"] #hist, bin_edges = np.histogram(eigenvector_centr.get_array(), 100, density=True) #cdf = np.cumsum(hist) #plt.plot(bin_edges[1:], cdf, marker='o') #plt.xlabel('Eingenvector centrality E') #plt.ylabel('P(x<=E)') #plt.title('Eigenvector Centrality Distribution') #plt.savefig('output/wikipedia-transitions-eigenvcentr-cdf.pdf') fig, ax = plt.subplots() powerlaw.plot_cdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality Distribution') ax.set_xlabel('Eingenvector centrality E') ax.set_ylabel('P(x<=E)') fig.savefig('output/wikipedia-transitions-eigenvcentr-cdf.pdf') plt.clf() fig, ax = plt.subplots() powerlaw.plot_ccdf(eigenvector_centr.get_array(), ax) #ax.set_title('Eigenvector Centrality Distribution') ax.set_xlabel('Eingenvector centrality E') ax.set_ylabel('P(x>=E)') fig.savefig('output/wikipedia-transitions-eigenvcentr-ccdf.pdf') plt.clf() print 'before hits' #ee, authority, hub = hits(network_transitions) #network_transitions.vertex_properties["authority"] = authority #network_transitions.vertex_properties["hub"] = hub #network_transitions.save("output/transitionsnetwork.xml.gz") print 'after hits' colors = { 'local_clust': 'r', 'eigenvector_centr': 'b', 'page_rank': 'g', 'kcore': 'm', 'hub': 'c', 'authority': 'k' } labels = { 'local_clust': 'clust.', 'eigenvector_centr': 'eigen. centr.', 'page_rank': 'page rank', 'kcore': 'kcore', 'hub': 'hub', 'authority': 'authority' } fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust', 'page_rank', 'hub', 'authority', 'kcore']: feature = network_transitions.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f], color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X>=f)$') ax.set_ylim([0, 1]) plt.legend(fancybox=True, loc=3, ncol=2, prop={'size': 4}) plt.tight_layout() plt.savefig('output/wikipedia-transitions-features-cdf.pdf') plt.clf() colors = { 'local_clust': 'r', 'eigenvector_centr': 'b', 'page_rank': 'g', 'kcore': 'm', 'hub': 'c', 'authority': 'k' } labels = { 'local_clust': 'clust.', 'eigenvector_centr': 'eigen. centr.', 'page_rank': 'page rank', 'kcore': 'kcore', 'hub': 'hub', 'authority': 'authority' } fig = plt.figure() ax = fig.add_subplot(111) for f in ['local_clust', 'page_rank', 'hub', 'authority', 'kcore']: feature = network_transitions.vertex_properties[f] powerlaw.plot_cdf(feature.get_array(), ax, label=labels[f], color=colors[f]) ax.set_xlabel('Feature $f$') ax.set_ylabel('$P(X<=f)$') plt.legend(fancybox=True, loc=3, ncol=2, prop={'size': 4}) plt.tight_layout() plt.savefig('output/wikipedia-transitions-features-ccdf.pdf') plt.clf()
avalanchetoolbox.avalanches.signal_variability(d_all[33,:],) pyplot.xlabel('Signal range (Standard deviation)') pyplot.ylabel('Range probability (log(p))') plots.savefig() pyplot.close('all') print('Analyzing all channels') active_sensors = avalanchetoolbox.avalanches.signal_variability(d_all, (8,8)) plots.savefig() pyplot.close('all') print('Running avalanche analyses') avs = array([]) for i in range(80): d = mat[data_key][:64,:,i] m = avalanchetoolbox.avalanches.run_analysis(d, time_scale='mean_iei', threshold_mode='Likelihood', threshold_level=10) if 'size_events' in m.keys(): avs = concatenate((avs, m['size_events']), 1) pyplot.figure() powerlaw.plot_cdf(avs) pyplot.xlim(1,100) pyplot.plot((active_sensors, active_sensors), pyplot.ylim()) pyplot.title('Neuronal avalanche size distribution, survival function') pyplot.xlabel('Avalanche Size (number of events)') pyplot.ylabel('P(Size>x)') plots.savefig() pyplot.close('all') plots.close()
#Used powerlaw package: https://github.com/jeffalstott/powerlaw import networkx as nx import matplotlib.pyplot as plt import powerlaw as pl #The graph is read as a weighted edgelist G = nx.Graph() G = nx.read_weighted_edgelist('as-22july06.mtx') #Each node in the dataset has a corresponding degree - deg references to each #node and iterates through the degrees of the graph and sorts it sorted_degree = sorted([deg for node, deg in G.degree()]) #cdf - cumulative distribution function is a function under the powerlaw function pl.cdf(data=sorted_degree, survival=False) # plot_cdf function plots the cdf - also under the powerlaw package pl.plot_cdf(sorted_degree) plt.show()