def TreeBN3(mTrain, mTest): # number of observations n = mTrain.shape[0] # number of features m = mTrain.shape[1] # Compute marginal distributions P_v, P_uv # numbet of positives cnt = mTrain.sum(axis = 0) # MLE with 1-Laplace smoothing prob = (cnt+1)/(n+2) # pairwise marginal table (i,j,00,01,10,11) Puv = {} for i in range(m): for j in range(i+1,m): c00 = np.logical_and(mTrain[:,i]==0, mTrain[:,j]==0).sum() c01 = np.logical_and(mTrain[:,i]==0, mTrain[:,j]==1).sum() c10 = np.logical_and(mTrain[:,i]==1, mTrain[:,j]==0).sum() Puv[(i,j)] = ((c00+1)/(n+4), (c01+1)/(n+4), (c10+1)/(n+4), (n-c00-c01-c10+1)/(n+4)) # Compute mutual information values I_uv G = nx.cycle_graph(m) for e, pr in Puv.items(): logpu = np.log(prob[e[0]]) logqu = np.log(1-prob[e[0]]) logpv = np.log(prob[e[1]]) logqv = np.log(1-prob[e[1]]) MI = pr[0]*(np.log(pr[0])-logqu-logqv) + pr[1]*(np.log(pr[1])-logqu-logpv) + pr[2]*(np.log(pr[2])-logpu-logqv) + pr[3]*(np.log(pr[3])-logpu-logpv) G.add_edge(e[0], e[1], weight=-MI) # ET = MWST({I_uv}) edges = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False) # test-set Log-Likelihood (LL) score nTest = mTest.shape[0] score = 0 # degree of each node deg = np.zeros(m, dtype='int') for e in edges: i = e[0] j = e[1] deg[i] += 1 deg[j] += 1 c00 = np.logical_and(mTest[:,i]==0, mTest[:,j]==0).sum() c01 = np.logical_and(mTest[:,i]==0, mTest[:,j]==1).sum() c10 = np.logical_and(mTest[:,i]==1, mTest[:,j]==0).sum() c11 = nTest - c00 - c01 - c10 pr = Puv[(i,j)] score += c00*np.log(pr[0]) + c01*np.log(pr[1]) + c10*np.log(pr[2]) + c11*np.log(pr[3]) cnt = mTest.sum(axis = 0) score -= np.log(prob).dot(np.multiply(cnt, deg-1)) + np.log(1-prob).dot(np.multiply(nTest-cnt, deg-1)) return score/nTest
def __init__(self, D): self.n = D.shape[0] G = nx.from_numpy_array(D) mst = tree.minimum_spanning_edges(G, algorithm="kruskal", data=True) self.edgelist = sorted([(e[0], e[1], e[2]['weight']) for e in list(mst)], key=lambda x: (x[2], x[0], [1])) self.UF = {(d,):0.0 for d in range(self.n)} #Union-Find with prev function images self.full = {(d,):0.0 for d in range(self.n)} #Union-Find without removing elements (for plotting) self.output = [] self._fit()
def _make_raw_network(self): # TODO make raw network """make the raw network: the network only with servers and stations not the users """ network = nx.Graph() # add raw nodes network.add_nodes_from(self.servers_idx) network.add_nodes_from(self.stations_idx) # add location to nodes occupied_locs = set() for node in network.nodes: while True: loc = (round(random.uniform(0, self.length), 2), round(random.uniform(0, self.width), 2)) if loc not in occupied_locs: network.nodes[node]['loc'] = loc occupied_locs.add(loc) break # adding server-server edges to the network with spanning tree servers_subgraph = nx.complete_graph( network.subgraph(self.servers_idx).copy()) for edge in servers_subgraph.edges: weight = self._euclidean_dis(network.nodes[edge[0]]['loc'], network.nodes[edge[1]]['loc']) servers_subgraph.edges[edge]['weight'] = weight mst = tree.minimum_spanning_edges(servers_subgraph, algorithm='kruskal', weight='weight', data=True) for edge in mst: network.add_edge(edge[0], edge[1], weight=edge[2]['weight']) # add station-server edges by connecting stations to their closest servers for station in self.stations_idx: dis_servers = {} for server in self.servers_idx: dis = self._euclidean_dis(network.nodes[station]['loc'], network.nodes[server]['loc']) dis_servers[server] = dis res = dict( sorted(dis_servers.items(), key=itemgetter(1))[:self.server_station_con]) for key, value in res.items(): network.add_edge(station, key, weight=value) return network
def approx_TSP(P): 'returns approx best tour of P' G = build_graph(P) mst = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False) edgelist = list(mst) T = nx.Graph() T.add_edges_from(edgelist) lst = list(nx.dfs_preorder_nodes(T)) newLst = [] for i in lst: newLst.append(P[i]) #print(tour_length(newLst)) return newLst
def Kruskals(): global Graph, visuals, button, toolbar, G, fixed_nodes, fixed_positions, pos, graph mst = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False) edgelist = list(mst) print(edgelist) Label5.configure(text=edgelist) nx.draw(G, pos, fixed=None, with_labels=True, node_size=800, node_color='skyblue', node_shape="s", alpha=0.5, linewidths=10, font_size=8, font_weight='bold') labels = nx.get_edge_attributes(G, 'weight') nx.draw_networkx_edge_labels(G, pos, edge_labels=labels) j = 0 for i in edgelist: plt.title('Iteration {}'.format(j)) j = j + 1 nx.draw_networkx_edges(G, pos, edgelist=[i], width=8, alpha=0.5, edge_color='r') print("Getting Inside") plt.pause(1) # plt.clf() # plt.show() kruskals = plt.text( 0.5, -0.1, "Kruskal's Edges Order: " + str(edgelist), size=12, ha="center", transform=ax.transAxes) #Shows the Caption below the graph
def checker(path_to_input, path_to_output): true_output = {} user_output = {} with open(path_to_input, 'r') as f: n, m, t = map(int, f.readline().split()) if t == 3 or t == 4: G = nx.Graph() else: G = nx.DiGraph() for _ in range(m): if t == 1: i, j = map(int, f.readline().split()) G.add_edge(i, j) elif t == 2: i, j, k = map(int, f.readline().split()) G.add_edge(i, j, weight=k) elif t == 3: i, j = map(int, f.readline().split()) G.add_edge(i, j) elif t == 4: i, j, k = map(int, f.readline().split()) G.add_edge(i, j, weight=k) mst = list(tree.minimum_spanning_edges(G, algorithm="kruskal", data=True)) # nx.draw_spring(G, with_labels = True) # plt.show() true_output['n'] = sum(x[-1]['weight'] for x in mst) if nx.is_connected(G) else -1 with open(path_to_output, 'r') as f: output_len = int(f.readline()) user_output['n'] = output_len return user_output, true_output, true_output['n'] == user_output['n']
while (cont < m): aresta = random.sample(range( 0, n), 2) # gera vetor de dois numeros aleatorios entre 0 e n if aresta not in arestas: arestas.append(aresta) cont += 1 g.add_edges_from(arestas) nx.draw(g, with_labels=True, font_weight='bold') plt.show() start = timer() kruskal = tree.minimum_spanning_edges(g, algorithm='kruskal', weight='weight', keys=True, data=True, ignore_nan=False) k = nx.Graph() k.add_edges_from(list(kruskal)) k.add_nodes_from(g.nodes) end = timer() arq.write(f"Tempo de execução Kruskal: {end-start}s\n") nx.draw(k, with_labels=True, font_weight='bold') plt.show() kArestas = k.edges() # arestas que são da AGM gArestas = [aresta for aresta in g.edges() if aresta not in kArestas] # arestas que não são da AGM
def creategraph(algo, data): file = data a1 = [] a2 = [] a3 = [] a4 = [] a5 = [] splat = file.split("\r\n\r\n") for number, section in enumerate(splat, 1): if number % 5 == 1: a1 += [section] elif number % 5 == 2: a2 += [section] elif number % 5 == 3: a3 += [section] elif number % 5 == 4: a4 += [section] elif number % 5 == 0: a5 += [section] a2 = int(a2[0]) a5 = int(a5[0]) Ad = adjency_matrix(a4, a2) G = nx.from_numpy_matrix(Ad) labels = [] a3 = a3[0].split("\r\n") Xn = [] Yn = [] for i in range(0, a2): a3[i] = a3[i].split("\t") Xn.append(float(a3[i][1])) Yn.append(float(a3[i][2])) for i in range(0, a2): labels.append(i) if algo == "Prims": mst = tree.minimum_spanning_edges(G, algorithm='prim', data=False) path = list(mst) length = 0 for i in range(0, len(path)): length = length + Ad[path[i][0]][path[i][1]] title = "Prims Cost : " + str(length) Xe = [] Ye = [] Xe2 = [] Ye2 = [] for e in G.edges(): for i in range(0, len(path) - 1): if (e[0] == path[i][0] and e[1] == path[i + 1][1]): Xe2.extend([Xn[e[0]], Xn[e[1]], None]) Ye2.extend([Yn[e[0]], Yn[e[1]], None]) Xe.extend([Xn[e[0]], Xn[e[1]], None]) Ye.extend([Yn[e[0]], Yn[e[1]], None]) elif algo == "Kruskals": mst = tree.minimum_spanning_edges(G, algorithm='prim', data=False) path = list(mst) length = 0 for i in range(0, len(path)): length = length + Ad[path[i][0]][path[i][1]] title = "Kruskal Cost : " + str(length) Xe = [] Ye = [] Xe2 = [] Ye2 = [] for e in G.edges(): for i in range(0, len(path) - 1): if (e[0] == path[i][0] and e[1] == path[i + 1][1]): Xe2.extend([Xn[e[0]], Xn[e[1]], None]) Ye2.extend([Yn[e[0]], Yn[e[1]], None]) Xe.extend([Xn[e[0]], Xn[e[1]], None]) Ye.extend([Yn[e[0]], Yn[e[1]], None]) elif algo == "Dijkstra": length, path = nx.bidirectional_dijkstra(G, a5, a2 - 1) title = "Dijkstra Cost : " + str(length) Xe = [] Ye = [] Xe2 = [] Ye2 = [] for e in G.edges(): for i in range(0, len(path) - 1): if (e[0] == path[i] and e[1] == path[i + 1]): Xe2.extend([Xn[e[0]], Xn[e[1]], None]) Ye2.extend([Yn[e[0]], Yn[e[1]], None]) Xe.extend([Xn[e[0]], Xn[e[1]], None]) Ye.extend([Yn[e[0]], Yn[e[1]], None]) elif algo == "Bellman Ford": length, path = nx.single_source_bellman_ford(G, 6, a2 - 1) title = "Bellman Ford Cost : " + str(length) Xe = [] Ye = [] Xe2 = [] Ye2 = [] for e in G.edges(): for i in range(0, len(path) - 1): if (e[0] == path[i] and e[1] == path[i + 1]): Xe2.extend([Xn[e[0]], Xn[e[1]], None]) Ye2.extend([Yn[e[0]], Yn[e[1]], None]) Xe.extend([Xn[e[0]], Xn[e[1]], None]) Ye.extend([Yn[e[0]], Yn[e[1]], None]) elif algo == "Floyd Warshall": path = nx.floyd_warshall_numpy(G) np.fill_diagonal(path, path.max()) title = "Floyd Warshall Cost : " + str(path.min()) Xe = [] Ye = [] Xe2 = [] Ye2 = [] for e in G.edges(): Xe.extend([Xn[e[0]], Xn[e[1]], None]) Ye.extend([Yn[e[0]], Yn[e[1]], None]) elif algo == "Clustering": c = nx.average_clustering(G) title = "Clustering Cost : " + str(c) Xe = [] Ye = [] Xe2 = [] Ye2 = [] for e in G.edges(): Xe.extend([Xn[e[0]], Xn[e[1]], None]) Ye.extend([Yn[e[0]], Yn[e[1]], None]) trace_nodes = dict(type='scatter', x=Xn, y=Yn, mode='markers', marker=dict(size=28, color='peru'), text=labels, hoverinfo='text') trace_edges = dict(type='scatter', mode='lines', arrowstyle='->', x=Xe, y=Ye, line=dict(width=1, color='royalblue'), hoverinfo='none') trace_edges2 = dict(type='scatter', mode='lines', x=Xe2, y=Ye2, line=dict(width=4.1, color="firebrick"), hoverinfo='none') axis = dict( showline=True, # hide axis line, grid, ticklabels and title zeroline=False, showgrid=True, showticklabels=True, title='') layout = dict( title=title, font=dict(family='Balto'), showlegend=True, xaxis=axis, yaxis=axis, ) annotations = [] for k in range(a2): annotations.append( dict( text=labels[k], x=Xn[k], y=Yn[k], #this additional value is chosen by trial and error xref='x1', yref='y1', font=dict(color='rgb(10,10,10)', size=14), showarrow=False)) data = dict(data=[trace_edges, trace_edges2, trace_nodes], layout=layout) data['layout'].update(annotations=annotations) return data
pos = nx.circular_layout(G) labels = nx.get_edge_attributes(G, 'weight') print( "----------------------------------Output MST----------------------------------" ) print("Nodes of the Graph:", G.nodes()) print("Edges of the Graph:", G.edges()) nx.draw_networkx_edge_labels(G, pos, edge_labels=labels) nx.draw(G, pos, with_labels=True, node_color='y') plt.savefig("Problem_graph2.png") #save as png plt.show() #KRUSKAL ALGORITHM print("*********************************************************************") print("The Solution using Kruskal Algorithm is shown:") h = nx.Graph() final = tree.minimum_spanning_edges(G, algorithm='kruskal', data=True) edgelist = list(final) print(sorted(edgelist)) h.add_edges_from(sorted(edgelist)) nx.draw_networkx_edge_labels(h, pos, edge_labels=labels) nx.draw(h, pos, with_labels=True, node_color='b') plt.savefig("kruskal.png") #save as png plt.show() #PRIMS ALGORITHM print("*********************************************************************") print("The Solution using Prims Algorithm is shown:") j = nx.Graph() final2 = tree.minimum_spanning_edges(G, algorithm='prim', data=True) edgelist2 = list(final2) print(sorted(edgelist2)) j.add_edges_from(sorted(edgelist2))
from networkx.algorithms import tree import networkx as nx import matplotlib.pyplot as plt G = nx.cycle_graph(4) G.add_edge(0, 3, weight=2) nx.draw_planar(G=G, with_labels=True, node_color='g', node_size=800, font_size=14, width=0.8) plt.show() mst = tree.minimum_spanning_edges(G, algorithm="prim", data=False) edgelist = list(mst) print(sorted(sorted(e) for e in edgelist)) # Reference: # https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.tree.mst.minimum_spanning_edges.html
def edgeWeightedGraph(fileName="./" + SUBDIRNAME + "/ewg.png"): print() G = nx.Graph() G.add_edges_from([ (0, 7, { 'weight': 0.16 }), (2, 3, { 'weight': 0.17 }), (1, 7, { 'weight': 0.19 }), (0, 2, { 'weight': 0.26 }), (5, 7, { 'weight': 0.28 }), (1, 3, { 'weight': 0.29 }), (1, 5, { 'weight': 0.32 }), (2, 7, { 'weight': 0.34 }), (4, 5, { 'weight': 0.35 }), (1, 2, { 'weight': 0.36 }), (4, 7, { 'weight': 0.37 }), (0, 4, { 'weight': 0.38 }), (6, 2, { 'weight': 0.40 }), (3, 6, { 'weight': 0.52 }), (6, 0, { 'weight': 0.58 }), (6, 4, { 'weight': 0.93 }), ]) # print(list(G.nodes)) print('Minimum Spanning Tree') # mst = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False) mst = tree.minimum_spanning_edges(G, algorithm='prim', data=False) edgelist = list(mst) edgeAttr = nx.get_edge_attributes(G, 'weight') # T = nx.minimum_spanning_tree(G) # print (sorted(T.edges(data=True))) mstWeight = 0 print('Edge Weight List') for edge in edgelist: mstWeight += float(edgeAttr[edge]) print(str(list(edge)) + '\t' + str(edgeAttr[edge])) print('Total MST Weight: ' + str(mstWeight)) # print(sorted(edgelist)) # print('List of edges') labels = nx.get_edge_attributes(G, 'weight') pos = nx.circular_layout(G) nx.draw_networkx_edge_labels(G, pos, edge_labels=labels) nx.draw(G, pos, with_labels=True, font_weight='bold') print("Saving file to " + str(fileName)) plt.savefig(fileName) print()
create_using=nx.DiGraph(), nodetype=int) pos = nx.spring_layout(subgraph) print(nx.info(subgraph)) nx.draw(subgraph, pos, node_color='#A0CBE2', edge_color='#00bb5e', width=1, edge_cmap=plt.cm.Blues, with_labels=True) plt.savefig("graph_sample.pdf") #%% mst = tree.minimum_spanning_edges(g, algorithm='prim', data=False) edgelist = list(mst) sorted(edgelist) #%% # No of Unique nodes degree_sequence = list(g.degree()) nb_nodes = len(g.nodes()) nb_arr = len(g.edges()) avg_degree = np.mean(np.array(degree_sequence)[:, 1]) med_degree = np.median(np.array(degree_sequence)[:, 1]) max_degree = max(np.array(degree_sequence)[:, 1]) min_degree = np.min(np.array(degree_sequence)[:, 1])
def MTRF(mTrain, mTest, ntree, perczero): # number of trees k = ntree # number of features m = mTrain.shape[1] # size of training set n = mTrain.shape[0] # number of zero mutual information r = int(m*(m-1)/2*perczero) # initialization # index correspondence ind = np.zeros(m, dtype = 'int') id_ = 0 for u in range(m): ind[u] = id_ for v in range(u+1,m): id_ += 1 # size of test set nTest = mTest.shape[0] # positives in test set cntTest = mTest.sum(axis = 0) # test set score in bootstrap sample score = np.zeros(k) for k_ in range(k): boot = np.random.choice(range(n), size = n, replace = True) # marginals # Compute marginal distributions P_v, P_uv # numbet of positives cnt = mTrain[boot].sum(axis = 0) # MLE with 1-Laplace smoothing prob = (cnt+1)/(n+2) # pairwise marginal table Puv = np.zeros((int(m*(m-1)/2), 4)) id_ = 0 # Compute mutual information values I_uv G = nx.cycle_graph(m) for u in range(m): for v in range(u+1,m): c00 = np.logical_and(mTrain[boot,u]==0, mTrain[boot,v]==0).sum() c01 = np.logical_and(mTrain[boot,u]==0, mTrain[boot,v]==1).sum() c10 = np.logical_and(mTrain[boot,u]==1, mTrain[boot,v]==0).sum() p00 = (c00+1)/(n+4) p01 = (c01+1)/(n+4) p10 = (c10+1)/(n+4) p11 = (n-c00-c01-c10+1)/(n+4) Puv[id_,0] = p00 Puv[id_,1] = p01 Puv[id_,2] = p10 Puv[id_,3] = p11 logpu = np.log(prob[u]) logqu = np.log(1-prob[u]) logpv = np.log(prob[v]) logqv = np.log(1-prob[v]) MI = p00*(np.log(p00)-logqu-logqv) + p01*(np.log(p01)-logqu-logpv) + p10*(np.log(p10)-logpu-logqv) + p11*(np.log(p11)-logpu-logpv) G.add_edge(u, v, weight=-MI) id_ += 1 # randomly setting exactly r mutual information scores to 0 edges = list(G.edges) zeroset = np.random.choice(range(len(edges)), size = r) for i in zeroset: G[edges[i][0]][edges[i][1]]['weight'] = 0 # print(k_+1,' tree') # for e in edges: # u = e[0] # v = e[1] # print(u, v, G[u][v]) # ET = MWST({I_uv}) edges = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False) # degree of each node deg = np.zeros(m, dtype='int') for e in edges: u = e[0] v = e[1] deg[u] += 1 deg[v] += 1 c00 = np.logical_and(mTest[:,u]==0, mTest[:,v]==0).sum() c01 = np.logical_and(mTest[:,u]==0, mTest[:,v]==1).sum() c10 = np.logical_and(mTest[:,u]==1, mTest[:,v]==0).sum() c11 = nTest - c00 - c01 - c10 tmp = Puv[ind[u]+v-u-1] score[k_] += c00*np.log(tmp[0]) + c01*np.log(tmp[1]) + c10*np.log(tmp[2]) + c11*np.log(tmp[3]) score[k_] -= np.log(prob).dot(np.multiply(cntTest, deg-1)) + np.log(1-prob).dot(np.multiply(nTest-cntTest, deg-1)) avgScore = np.mean(score)/nTest medScore = np.median(score)/nTest return avgScore, medScore
def MTEM(mTrain, mTest, ntree): # number of trees k = ntree # number of iterations niter = 100 # number of features m = mTrain.shape[1] # size of training set n = mTrain.shape[0] # initialization # mixture coefficient lam = np.ones(k)/k # marginals # prob = np.random.rand(k,m) prob = np.ones((k,m)) * 0.5 # pairwise marginal table Puv = np.zeros((k,int(m*(m-1)/2), 4)) gamma = np.zeros((k,n)) # gammau = np.zeros((k,n)) # index correspondence ind = np.zeros(m, dtype = 'int') id_ = 0 for u in range(m): ind[u] = id_ for v in range(u+1,m): id_ += 1 # initialization by M step # MLE with 1-Laplace smoothing prob_init = (mTrain.sum(axis = 0) +1)/(n+2) Puv_init = np.zeros((int(m*(m-1)/2), 4)) id_ = 0 for u in range(m): for v in range(u+1,m): c00 = np.logical_and(mTrain[:,u]==0, mTrain[:,v]==0).sum() c01 = np.logical_and(mTrain[:,u]==0, mTrain[:,v]==1).sum() c10 = np.logical_and(mTrain[:,u]==1, mTrain[:,v]==0).sum() Puv_init[id_,0] = (c00+1)/(n+4) Puv_init[id_,1] = (c01+1)/(n+4) Puv_init[id_,2] = (c10+1)/(n+4) Puv_init[id_,3] = (n-c00-c01-c10+1)/(n+4) id_ += 1 for k_ in range(k): gamma[k_] = np.zeros(n) edges = nx.generators.trees.random_tree(m).edges deg = np.zeros(m, dtype='int') # training-set Log-Likelihood # compute degree of each node id_ = 0 for e in edges: u = e[0] v = e[1] deg[u] += 1 deg[v] += 1 tmp = np.log(Puv_init[ind[u]+v-u-1]) gamma[k_] += np.logical_and(mTrain[:,u]==0, mTrain[:,v]==0)*tmp[0] gamma[k_] += np.logical_and(mTrain[:,u]==0, mTrain[:,v]==1)*tmp[1] gamma[k_] += np.logical_and(mTrain[:,u]==1, mTrain[:,v]==0)*tmp[2] gamma[k_] += np.logical_and(mTrain[:,u]==1, mTrain[:,v]==1)*tmp[3] id_ += 1 gamma[k_] -= mTrain.dot(np.multiply(np.log(prob_init), deg-1)) + (1-mTrain).dot(np.multiply(np.log(1-prob_init), deg-1)) # lambda_k * T_k propto Pr(X,Z) gamma = np.multiply(lam.reshape(-1,1), np.exp(gamma)) # posterior probability of the hidden variable Pr(Z | X) gamma = gamma / gamma.sum(axis=0) # postives in each column cntTrain = mTrain.sum(axis = 0) # edges of each tree edgeset = np.zeros((k,m-1,2), dtype='int') # degrees of eaxh tree degs = np.zeros((k,m), dtype='int') it = 0 score = 0 dscore = 1 while it < niter and abs(dscore) > 5e-3: score0 = score # E step Gam = gamma.sum(axis=1) # P^k(x^i) gamma = np.divide(gamma,Gam.reshape(-1,1)) # print(gamma) # M step # MLE of mixture coef lam = Gam/n # print('iter ', it+1, ' lambda: ', lam) for k_ in range(k): # MLE with 1-Laplace smoothing prob[k_] = (np.multiply(gamma[k_].reshape(-1,1),mTrain).sum(axis=0)*n +1)/(n+2) id_ = 0 # Compute mutual information values I_uv G = nx.cycle_graph(m) for u in range(m): for v in range(u+1,m): c00 = gamma[k_][np.logical_and(mTrain[:,u]==0, mTrain[:,v]==0)].sum()*n c01 = gamma[k_][np.logical_and(mTrain[:,u]==0, mTrain[:,v]==1)].sum()*n c10 = gamma[k_][np.logical_and(mTrain[:,u]==1, mTrain[:,v]==0)].sum()*n # c11 = gamma[k_][np.logical_and(mTrain[:,u]==1, mTrain[:,v]==1)].sum()*n p00 = (c00+1)/(n+4) p01 = (c01+1)/(n+4) p10 = (c10+1)/(n+4) # p11 = (n-c00-c01-c10+1)/(n+4) p11 = 1 - p00 - p01 - p10 Puv[k_,id_,0] = p00 Puv[k_,id_,1] = p01 Puv[k_,id_,2] = p10 Puv[k_,id_,3] = p11 logpu = np.log(prob[k_,u]) logqu = np.log(1-prob[k_,u]) logpv = np.log(prob[k_,v]) logqv = np.log(1-prob[k_,v]) MI = p00*(np.log(p00)-logqu-logqv) + p01*(np.log(p01)-logqu-logpv) + p10*(np.log(p10)-logpu-logqv) + p11*(np.log(p11)-logpu-logpv) G.add_edge(u, v, weight=-MI) id_ += 1 # ET = MWST({I_uv}) edges = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False) deg = np.zeros(m, dtype='int') gamma[k_] = np.zeros(n) # training-set Log-Likelihood # compute degree of each node id_ = 0 for e in edges: u = e[0] v = e[1] edgeset[k_,id_,0] = u edgeset[k_,id_,1] = v deg[u] += 1 deg[v] += 1 tmp = np.log(Puv[k_,ind[u]+v-u-1]) gamma[k_] += np.logical_and(mTrain[:,u]==0, mTrain[:,v]==0)*tmp[0] gamma[k_] += np.logical_and(mTrain[:,u]==0, mTrain[:,v]==1)*tmp[1] gamma[k_] += np.logical_and(mTrain[:,u]==1, mTrain[:,v]==0)*tmp[2] gamma[k_] += np.logical_and(mTrain[:,u]==1, mTrain[:,v]==1)*tmp[3] id_ += 1 # print(deg-1) degs[k_] = deg gamma[k_] -= mTrain.dot(np.multiply(np.log(prob[k_]), deg-1)) + (1-mTrain).dot(np.multiply(np.log(1-prob[k_]), deg-1)) # print('prob',prob) # print('Puv',Puv[k_].sum(axis=1)) gamma = np.multiply(lam.reshape(-1,1), np.exp(gamma)) score = np.log(gamma.sum(axis=0)).sum()/n dscore = score - score0 it += 1 # print(it, dscore, score, score0) # posterior probability of the hidden variable gamma = gamma / gamma.sum(axis=0) if it < 100: print('\tEM converges in ', it, ' iteration. score = ', score) else: print('\tEM can not converges in ', it, ' iteration. score = ', score) # size of test set nTest = mTest.shape[0] Lik = np.zeros((k,nTest)) # test-set Log-Likelihood for k_ in range(k): for e in edgeset[k_]: u = e[0] v = e[1] tmp = np.log(Puv[k_,ind[u]+v-u-1]) Lik[k_] += np.logical_and(mTest[:,u]==0, mTest[:,v]==0)*tmp[0] Lik[k_] += np.logical_and(mTest[:,u]==0, mTest[:,v]==1)*tmp[1] Lik[k_] += np.logical_and(mTest[:,u]==1, mTest[:,v]==0)*tmp[2] Lik[k_] += np.logical_and(mTest[:,u]==1, mTest[:,v]==1)*tmp[3] Lik[k_] -= mTest.dot(np.multiply(np.log(prob[k_]), degs[k_]-1)) + (1-mTest).dot(np.multiply(np.log(1-prob[k_]), degs[k_]-1)) Lik = np.multiply(lam.reshape(-1,1), np.exp(Lik)) score = np.log(Lik.sum(axis=0)).sum()/nTest return score
import networkx as nx import matplotlib.pyplot as plt from networkx.algorithms import tree #punto1 #CREAZIONE GRAFO E ARCHI G = nx.Graph() G.add_edge(0, 2, weight=49) G.add_edge(0, 4, weight=43) G.add_edge(1, 3, weight=31) G.add_edge(1, 2, weight=56) #CREAZIONE MINIMUM SPANNING TREE, LISTA DEGLI ARCHI ORDINE CRESCENTE mst = tree.minimum_spanning_edges(G, algorithm='kruskal', data=True) print("tipo", type(mst)) edgelist = list(mst) #CREAZIONE GRAFO COMPLETO CON PESO ARCHI A NONE G_complete = nx.complete_graph(5) GrafoDioCane = nx.Graph() #ASSEGNAZIONE DEL PESO DEGLI ARCHI DEL MST AL GRAFO COMPLETO for i in edgelist: a = int(i[0]) b = int(i[1]) G_complete[a][b]['weight'] = i[2].get("weight") #LISTA DEGLI ARCHI ORDINE DECRESCENTE print(edgelist)
from networkx.algorithms import tree import matplotlib.pyplot as plt import seaborn as sns import pandas as pd import numpy as np sns.set() G = nx.Graph() sub_G = nx.Graph() reader_points = pd.read_excel( r'E:\learning_pymc3\Thesis_Code\Prim_Algorithm\kashan\kashan_points.xlsx') reader_cost = pd.read_excel( r'E:\learning_pymc3\Thesis_Code\Prim_Algorithm\kashan\kashan_cost.xlsx') matrix_cost = np.array(reader_cost) matrix_points = np.array(reader_points) for i in range(matrix_points.shape[0]): G.add_node(i, pos=(matrix_points[i, 1], matrix_points[i, 2])) for i in range(matrix_cost.shape[0]): G.add_edge(matrix_cost[i, 0], matrix_cost[i, 1], weight=matrix_cost[i, 2]) pos = nx.get_node_attributes(G, 'pos') weight = nx.get_edge_attributes(G, 'weight') mst = list(tree.minimum_spanning_edges(G, algorithm='prim', data=False)) print(list(mst)) for i in range(matrix_points.shape[0]): sub_G.add_node(i, pos=(matrix_points[i, 1], matrix_points[i, 2])) sub_G.add_edges_from(mst) nx.draw_networkx(G, pos) #nx.draw_networkx_edge_labels(G,pos,edge_labels=weight,font_color='brown') nx.draw_networkx_edges(G, pos, edgelist=mst, edge_color='r', width=5) plt.savefig("Prim_network.jpg", dpi=300, bbox_inches='tight')
def TreeBN2(mTrain, mTest): # number of observations n = mTrain.shape[0] # number of features m = mTrain.shape[1] # Compute marginal distributions P_v, P_uv # numbet of positives cnt = mTrain.sum(axis = 0) # MLE with 1-Laplace smoothing prob = (cnt+1)/(n+2) # pairwise marginal table (i,j,00,01,10,11) Puv = np.zeros((int(m*(m-1)/2), 4)) id_ = 0 ind = np.zeros(m, dtype = 'int') # Compute mutual information values I_uv G = nx.cycle_graph(m) for i in range(m): ind[i] = id_ for j in range(i+1,m): c00 = np.logical_and(mTrain[:,i]==0, mTrain[:,j]==0).sum() c01 = np.logical_and(mTrain[:,i]==0, mTrain[:,j]==1).sum() c10 = np.logical_and(mTrain[:,i]==1, mTrain[:,j]==0).sum() p00 = (c00+1)/(n+4) p01 = (c01+1)/(n+4) p10 = (c10+1)/(n+4) p11 = (n-c00-c01-c10+1)/(n+4) Puv[id_,0] = p00 Puv[id_,1] = p01 Puv[id_,2] = p10 Puv[id_,3] = p11 logpu = np.log(prob[i]) logqu = np.log(1-prob[i]) logpv = np.log(prob[j]) logqv = np.log(1-prob[j]) MI = p00*(np.log(p00)-logqu-logqv) + p01*(np.log(p01)-logqu-logpv) + p10*(np.log(p10)-logpu-logqv) + p11*(np.log(p11)-logpu-logpv) G.add_edge(i, j, weight=-MI) id_ += 1 # ET = MWST({I_uv}) edges = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False) # test-set Log-Likelihood (LL) score nTest = mTest.shape[0] score = 0 # degree of each node deg = np.zeros(m, dtype='int') for e in edges: i = e[0] j = e[1] deg[i] += 1 deg[j] += 1 c00 = np.logical_and(mTest[:,i]==0, mTest[:,j]==0).sum() c01 = np.logical_and(mTest[:,i]==0, mTest[:,j]==1).sum() c10 = np.logical_and(mTest[:,i]==1, mTest[:,j]==0).sum() c11 = nTest - c00 - c01 - c10 tmp = Puv[ind[i]+j-i-1] score += c00*np.log(tmp[0]) + c01*np.log(tmp[1]) + c10*np.log(tmp[2]) + c11*np.log(tmp[3]) cnt = mTest.sum(axis = 0) score -= np.log(prob).dot(np.multiply(cnt, deg-1)) + np.log(1-prob).dot(np.multiply(nTest-cnt, deg-1)) return score/nTest