def draw_cdf_JI_helper(ases_in_use, mode, color, marker): #mode is for title #plt.title(title) colors = ['b'] lis_legend = [] lis = [] total_dots = 0 #names = ("BGP","iBGP","eBGP") for count,as_ in enumerate(ases_in_use): print count,len(as_) #c2 = 0 for lg1,lg2 in itertools.combinations(as_.get_attr(LGS),2): total_dots += 1 #print c2 #c2 += 1 lg1.update_prefix_as_path() p1 = lg1.get_prefixes() lg2.update_prefix_as_path() p2 = lg2.get_prefixes() lis.append(JI(p1,p2)) if JI(p1,p2) == 0: print lg1,lg2 lg1.release() lg2.release() print "total dots",total_dots#658 dist,prob = pl.cdf(lis) #plt.scatter(dist,prob,c='b',marker='x') return plt.scatter(dist,prob,c=color,marker=marker) '''
def plot_aggregated_counts_distributions_ccdf(): #agg_distributions = read_pickle(HOME+'output/aggregated_counts_distribution.obj') #for i in agg_distributions.values(): # print len(i) colors = {'source_article': 'r', 'target_article': 'b'} labels = { 'source_article': 'source article', 'target_article': 'target article' } fig = plt.figure() ax = fig.add_subplot(111) #for category in ['source_article', 'target_article']: # data = agg_distributions[category] # data = [int(x[0]) for x in data] # powerlaw.plot_ccdf(data, ax, label=labels[category],color=colors[category]) category_distributions = read_pickle( HOME + 'output/category_counts_distribution.obj') data = category_distributions['counts'] data = [int(x[0]) for x in data] #to consider the edges that have zero transitions we substract the number transitions from the number of edges in wikipeida number_of_edges = 339463340 listofzeros = [0] * (number_of_edges - len(data)) print len(data) print len(listofzeros) zeros = np.zeros((number_of_edges - len(data))) data = np.append(zeros, data) #data = data.extend(listofzeros) print data #hist, bin_edges = np.histogram(data, bins=100, normed=True) #ones = np.ones(100) #ccdf = ones - np.cumsum(data) #cdf = np.cumsum(hist) #print cdf #print ccdf bins, CDF = powerlaw.cdf(data, survival=True) plt.plot(bins, CDF) plt.xscale('symlog') #powerlaw.plot_cdf(data, ax, label='transitions', color='r') # further plotting #ax.set_xlabel(r'Number of transitions $n$') #ax.set_ylabel(r'$P(X \geq n)$') plt.legend(fancybox=True, loc='lower left', ncol=1, prop={'size': 5}) #leg = plt.gca().get_legend() #ltext = leg.get_texts() # all the text.Text instance in the legend #plt.setp(ltext, fontsize='small') # the legend text fontsize plt.tight_layout() plt.savefig('output/agg_counts_distributions.pdf', bbox_inches='tight')
def plot_aggregated_counts_distributions_ccdf(): #agg_distributions = read_pickle(HOME+'output/aggregated_counts_distribution.obj') #for i in agg_distributions.values(): # print len(i) colors= {'source_article':'r','target_article':'b'} labels = {'source_article': 'source article', 'target_article':'target article'} fig = plt.figure() ax = fig.add_subplot(111) #for category in ['source_article', 'target_article']: # data = agg_distributions[category] # data = [int(x[0]) for x in data] # powerlaw.plot_ccdf(data, ax, label=labels[category],color=colors[category]) category_distributions = read_pickle(HOME+'output/category_counts_distribution.obj') data = category_distributions['counts'] data = [int(x[0]) for x in data] #to consider the edges that have zero transitions we substract the number transitions from the number of edges in wikipeida number_of_edges = 339463340 listofzeros = [0] * (number_of_edges - len(data)) print len(data) print len(listofzeros) zeros = np.zeros((number_of_edges - len(data))) data = np.append(zeros, data) #data = data.extend(listofzeros) print data #hist, bin_edges = np.histogram(data, bins=100, normed=True) #ones = np.ones(100) #ccdf = ones - np.cumsum(data) #cdf = np.cumsum(hist) #print cdf #print ccdf bins, CDF = powerlaw.cdf(data, survival=True) plt.plot(bins, CDF) plt.xscale('symlog') #powerlaw.plot_cdf(data, ax, label='transitions', color='r') # further plotting #ax.set_xlabel(r'Number of transitions $n$') #ax.set_ylabel(r'$P(X \geq n)$') plt.legend(fancybox=True, loc='lower left', ncol=1, prop={'size':5}) #leg = plt.gca().get_legend() #ltext = leg.get_texts() # all the text.Text instance in the legend #plt.setp(ltext, fontsize='small') # the legend text fontsize plt.tight_layout() plt.savefig('output/agg_counts_distributions.pdf', bbox_inches='tight')
def draw_avg_geo_as_dist_helper(lg_pairs, d, color, mode): values = [] for lg1,lg2 in lg_pairs: lg1.update_prefix_as_path() p1 = lg1.get_prefixes() lg2.update_prefix_as_path() p2 = lg2.get_prefixes() overlap = p1 & p2 if len(overlap) == 0:#no overlapping prefixes continue if mode == GEODIST: for p in overlap: coord_p = d[p] coord_lg1 = getattr(lg1,LAT_LON) coord_lg2 = getattr(lg2,LAT_LON) if coord_lg1 == (0.0, 0.0) or coord_lg2 == (0.0, 0.0) or coord_p == (0.0 , 0.0):#if one of the location is unknown continue dist1 = geopy.distance.great_circle(coord_lg1,coord_p).km dist2 = geopy.distance.great_circle(coord_lg2,coord_p).km avg_dist = (dist1+dist2) / 2.0 #print lg1,lg2,p,avg_dist values.append(avg_dist) elif mode == ASDIST: for p in overlap: s1 = lg1.get_as_paths_set(p) s2 = lg2.get_as_paths_set(p) len1 = len(s1) len2 = len(s2) avg_asdist = (len1+len2) / 2.0 #print lg1,lg2,p,avg_asdist values.append(avg_asdist) else: print "wrong!" return lg1.release() lg2.release() dist,prob = pl.cdf(values) return plt.scatter(dist,prob,c=color,marker='x')
def draw_cdf(ases, title, lower=-1, upper=100): ''' args: ases: list of AS objects title: title of the figure lower: lower bound of num of LGs in the ASes to be selected upper: upper bound of num of LGs in the ASes to be selected ''' ases_in_use = [] for as_ in ases: if len(as_) >= lower and len(as_) <= upper: ases_in_use.append(as_) #plt.title(title) plt.xlabel("number of neighbors") plt.ylabel("cdf") colors = ['r','b','g'] lis_legend = [] names = ("BGP","iBGP","eBGP") lgs = [] for as_ in ases_in_use: lgs += as_.get_attr(LGS) for i in range(len(names)): lis = [] for lg in lgs: n = len(lg.get_attr(NEI_SET)[i]) lis.append(float(n)) dist,prob = pl.cdf(lis) lis_legend.append(plt.scatter(dist,prob,c=colors[i],marker='x')) plt.legend(tuple(lis_legend),names,scatterpoints=1,loc="lower right") plt.autoscale() plt.margins(0.03) #plt.title("Degree distribution based on BGP, eBGP, and iBGP neighbors") #plt.xlim(0,15000) #plt.ylim(0,1) plt.show()
def analyze_pk(g, figfile): ks = [x[1] for x in g.degree()] results = powerlaw.Fit(ks, discrete=True) figPDF = results.power_law.plot_pdf(label=r"fit($\alpha$: %.2f, $x_{min}$: %d)" % (results.alpha, results.xmin)) figPDF.set_ylabel("P(k)") figPDF.set_xlabel("k") cdf = {z[0]:z[1] for z in zip(*powerlaw.cdf(ks))} cdf_xmin = cdf[results.xmin] bin_edges, prob = powerlaw.pdf(ks) x = [ (bin_edges[i]+bin_edges[i+1])/2.0 for i in range(0, len(bin_edges)-1) ] plt.plot(x, prob/(1.0-cdf_xmin), label="data") plt.legend() plt.savefig(figfile) return (results.power_law.alpha, results.power_law.xmin)
#Used powerlaw package: https://github.com/jeffalstott/powerlaw import networkx as nx import matplotlib.pyplot as plt import powerlaw as pl #The graph is read as a weighted edgelist G = nx.Graph() G = nx.read_weighted_edgelist('as-22july06.mtx') #Each node in the dataset has a corresponding degree - deg references to each #node and iterates through the degrees of the graph and sorts it sorted_degree = sorted([deg for node, deg in G.degree()]) #cdf - cumulative distribution function is a function under the powerlaw function pl.cdf(data=sorted_degree, survival=False) # plot_cdf function plots the cdf - also under the powerlaw package pl.plot_cdf(sorted_degree) plt.show()