class GraphMes: def __init__(self, logging, graph=None, file=None, start=0, ints=False): self.logging = logging if not os.path.isdir(logging): os.mkdir(logging) if graph == None and file != None: self.helper = DataHelper(file, NP=False) self.samples = self.helper.GetSamples() if ints == True: samples = [] for i in self.samples: samples.append([int(i[0]), int(i[1]), int(i[2])]) self.samples = np.array(samples) self.G = self.readGraph(file, ints=ints) self.uG = self.readGraph(file, ints=ints, unweight=True) elif graph != None and file == None: self.G = graph self.uG = nx.Graph(self.G) self.samples = [] for edge in self.G.edges(): for i in self.G[edge[0]][edge[1]]: self.samples.append([ edge[0], self.G[edge[0]][edge[1]][i]['attr'], edge[1] ]) else: raise Exception self.start = start self.node2id, self.id2node = self._node2id() self.edge2id, self.id2edge = self._edge2id() def readGraph(self, sf, ints=False, unweight=False): self.SamplesCnt = len(self.samples) if unweight == True: G = nx.Graph() for sample in self.samples: G.add_edge(sample[0], sample[2]) else: G = nx.MultiDiGraph() for sample in self.samples: G.add_edge(sample[0], sample[2], attr=sample[1]) return G def graph2id(self, of): with open(of, 'w') as f: for h, r, t in self.samples: f.write( str(self.node2id[h]) + ' ' + str(self.edge2id[r]) + ' ' + str(self.node2id[t]) + '\n') def _node2id(self): node2id = dict() id2node = dict() index = 0 for node in self.G.nodes(): node2id.update({node: self.start + index}) id2node.update({self.start + index: node}) index += 1 return node2id, id2node def _edge2id(self): edge2id = dict() id2edge = dict() self.attrs = set() for edge in self.G.edges(): for i in self.G[edge[0]][edge[1]]: # print(self.G[edge[0]][edge[1]][i]['attr']) self.attrs.add(self.G[edge[0]][edge[1]][i]['attr']) index = 0 for attr in self.attrs: edge2id.update({attr: self.start + index}) id2edge.update({self.start + index: attr}) index += 1 return edge2id, id2edge def id2file(self, nodefn, edgefn): with open(nodefn, 'w') as nf: for i in range(len(self.node2id)): nf.write(self.id2node[i] + ' ' + str(i) + '\n') with open(edgefn, 'w') as ef: for i in range(len(self.edge2id)): ef.write(self.id2edge[i] + ' ' + str(i) + '\n') def zipf(self, plot=True): print('-------------') x, y = [], [] degree = nx.degree_histogram(self.G) for i in range(len(degree)): if degree[i] != 0: y.append(degree[i] / float(sum(degree))) x.append(i) xdata = np.array(x) ydata = np.array(y) fita, fitb = optimize.curve_fit(powerLaw, xdata, ydata) print(fita, fitb) if plot == False: return fita, fitb else: # x = np.linspace(xdata.min(),xdata.max(),50) # y = fita[1]*powerNp(x,-fita[0]) plt.figure() plt.title("Degree distribution curve fitting\n") matplotlib.rc('xtick', labelsize=30) matplotlib.rc('ytick', labelsize=30) plt.text(max(xdata) * 0.4, max(ydata) * 0.4, 'y=' + "{:.3f}".format(fita[1]) + '*x^-' + "{:.3f}".format(fita[0]), ha='center') plt.plot(xdata, ydata, '.') # plt.plot(xdata,ydata,label='data') plt.xlabel('k(rank order)') plt.ylabel('p(k)') plt.savefig(self.logging + '/zipf.png') plt.close(0) plt.figure() plt.title("Degree distribution curve fitting (log)\n") plt.text(max(xdata) * 0.4, max(ydata) * 0.4, 'y=' + "{:.3f}".format(fita[1]) + '*x^-' + "{:.3f}".format(fita[0]), ha='center') plt.xlabel('k(rank order)') plt.ylabel('p(k)') plt.loglog(xdata, ydata, '.') # plt.loglog(xdata,ydata,'g',label='data') plt.savefig(self.logging + '/zipf_log.png') return fita, fitb def zipf_coeffi(self, plot=True): # print(nx.average_clustering(graphmes.uG)) degree = {} zipf_coeffi = {} for i in self.uG.nodes(): if self.uG.degree(i) in degree: degree[self.uG.degree(i)].append(i) else: degree.update({self.uG.degree(i): [i]}) for i in degree: zipf_coeffi.update({i: 0}) for node in degree[i]: zipf_coeffi[i] += nx.clustering(self.uG, node) zipf_coeffi[i] /= len(degree[i]) zipf_coeffi = np.array(texthelper.sortDict(zipf_coeffi, By="key")) if plot == False: return zipf_coeffi else: xdata = zipf_coeffi[:, 0] ydata = zipf_coeffi[:, 1] fita, fitb = optimize.curve_fit(powerLaw, xdata, ydata) plt.figure() plt.title("Degree-Clustering distribution curve fitting\n") plt.text(max(xdata) * 0.4, max(ydata) * 0.4, 'y=' + "{:.2f}".format(fita[1]) + '*x^-' + "{:.2f}".format(fita[0]), ha='center') plt.plot(xdata, ydata, '.') # plt.plot(xdata,ydata,'.', label='data') plt.xlabel('k') plt.ylabel('clustering') plt.savefig(self.logging + '/zipf_coeffi.png') plt.close(0) plt.figure() plt.text(max(xdata) * 0.4, max(ydata) * 0.4, 'y=' + "{:.2f}".format(fita[1]) + '*x^-' + "{:.2f}".format(fita[0]), ha='center') plt.title("Degree-Clustering distribution curve fitting (log)\n") plt.loglog(xdata, ydata, '.') # plt.loglog(xdata,ydata,'.', label='data') plt.xlabel('log(k)') plt.ylabel('log(clustering)') plt.savefig(self.logging + '/zipf_coeffi_log.png') plt.close(0) return zipf_coeffi def record(self, additional=True): with open(self.logging + '/info.txt', 'w') as f: f.write(" Number of nodes :" + str(len(self.nodes)) + '\n') f.write(" Number of edges :" + str(len(self.edges)) + '\n') f.write(" Number of samples :" + str(self.samplesCnt) + '\n') if additional: uG = nx.Graph(self.G) connectedCnt = nx.number_connected_components(uG) f.write(" number_connected_components :" + str(connectedCnt) + '\n') if connectedCnt == 1: f.write(" Diameter :" + str(nx.diameter(uG)) + '\n') f.write(" Radius :" + str(nx.radius(uG)) + '\n') f.write(" average_shortest_path_length :" + str(nx.average_shortest_path_length(uG)) + '\n') f.write(" Density :" + str(nx.density(uG)) + '\n') f.write(" average_clustering :" + str(nx.average_clustering(uG)) + '\n') f.write(" node_connectivity :" + str(nx.node_connectivity(self.G)) + '\n') f.write(" global_efficiency :" + str(nx.global_efficiency(uG)) + '\n') @property def nodes(self): return list(self.G.nodes) @property def nodeCnt(self): return len(self.G.nodes) @property def samplesCnt(self): return len(self.samples) @property def edges(self): return list(self.attrs) @property def edgeCnt(self): return len(self.attrs)
class GraphMes: def __init__(self, graph=None, file=None, start=0, ints=False): if graph==None and file!=None: self.helper = DataHelper(file,NP=False) self.samples = self.helper.GetSamples() if ints == True: samples = [] for i in self.samples: samples.append([int(i[0]), int(i[1]),int(i[2])]) self.samples = np.array(samples) self.G = self.readGraph(file, ints=ints) self.uG = self.readGraph(file, ints=ints, unweight = True) elif graph!=None and file==None: self.G = graph self.uG = nx.Graph(self.G) self.samples = [] for edge in self.G.edges(): for i in self.G[edge[0]][edge[1]]: self.samples.append([edge[0], self.G[edge[0]][edge[1]][i]['attr'], edge[1]]) else: raise Exception self.start = start self.node2id, self.id2node = self._node2id() self.edge2id, self.id2edge = self._edge2id() def readGraph(self, sf, ints=False, unweight = False): self.SamplesCnt = len(self.samples) if unweight == True: G = nx.Graph() for sample in self.samples: G.add_edge(sample[0],sample[2]) else: G = nx.MultiDiGraph() for sample in self.samples: G.add_edge(sample[0],sample[2],attr=sample[1]) return G def graph2id(self, of): with open(of, 'w') as f: for h,r,t in self.samples: f.write(str(self.node2id[h])+' '+str(self.edge2id[r])+' '+str(self.node2id[t])+'\n') def _node2id(self): node2id = dict() id2node = dict() index = 0 for node in self.G.nodes(): node2id.update({node:self.start+index}) id2node.update({self.start+index:node}) index += 1 return node2id, id2node def _edge2id(self): edge2id = dict() id2edge = dict() self.attrs = set() for edge in self.G.edges(): for i in self.G[edge[0]][edge[1]]: # print(self.G[edge[0]][edge[1]][i]['attr']) self.attrs.add(self.G[edge[0]][edge[1]][i]['attr']) index = 0 for attr in self.attrs: edge2id.update({attr:self.start+index}) id2edge.update({self.start+index:attr}) index += 1 return edge2id, id2edge def id2file(self, nodefn, edgefn): with open(nodefn, 'w') as nf: for i in range(len(self.node2id)): nf.write(self.id2node[i]+' '+str(i)+'\n') with open(edgefn, 'w') as ef: for i in range(len(self.edge2id)): ef.write(self.id2edge[i]+' '+str(i)+'\n') def _update_margin(self, searched, margin): margin_backup = copy.copy(margin) for i in margin_backup: for j in self.G.neighbors(i): if j not in searched: margin.add(j) for i in margin_backup: margin.remove(i) searched.add(i) if len(margin) == 0: random_sampling = np.random.randint(0, len(self.nodes)-1) while( random_sampling not in searched and len(margin)==0): margin.add(random_sampling) random_sampling = np.random.randint(0, len(self.nodes)-1) def cohesive(self, windowSize): all_bs = nx.eigenvector_centrality(self.uG) searched = set() margin = set() windows = set() center = np.random.randint(0,len(self.nodes)-1) windows.add(center) margin.add(center) searched.add(center) while(len(windows) < windowSize): margin_bs = {} self._update_margin(searched, margin) for i in margin: margin_bs.update({i:all_bs[i]}) margin_bs_sort = texthelper.sortDict(margin_bs, By="value", reverse=True) for j in margin_bs_sort: windows.add(j[0]) if len(windows) >= windowSize: break return windows @property def nodes(self): return list(self.G.nodes) @property def nodeCnt(self): return len(self.G.nodes) @property def samplesCnt(self): return len(self.samples) @property def edges(self): return list(self.attrs) @property def edgeCnt(self): return len(self.attrs)