Пример #1
0
def TreeBN3(mTrain, mTest):
    # number of observations
    n = mTrain.shape[0]
    # number of features
    m = mTrain.shape[1]

    # Compute marginal distributions P_v, P_uv
    # numbet of positives
    cnt = mTrain.sum(axis = 0) 
    # MLE with 1-Laplace smoothing
    prob = (cnt+1)/(n+2)
    # pairwise marginal table (i,j,00,01,10,11)
   
    
    Puv = {}
    for i in range(m):
        for j in range(i+1,m):
            c00 = np.logical_and(mTrain[:,i]==0, mTrain[:,j]==0).sum()
            c01 = np.logical_and(mTrain[:,i]==0, mTrain[:,j]==1).sum()
            c10 = np.logical_and(mTrain[:,i]==1, mTrain[:,j]==0).sum()
            Puv[(i,j)] = ((c00+1)/(n+4), (c01+1)/(n+4), (c10+1)/(n+4), (n-c00-c01-c10+1)/(n+4))
        
    # Compute mutual information values I_uv
    G = nx.cycle_graph(m)
            
    for e, pr in Puv.items():
        logpu = np.log(prob[e[0]])
        logqu = np.log(1-prob[e[0]])
        logpv = np.log(prob[e[1]])
        logqv = np.log(1-prob[e[1]])
        MI = pr[0]*(np.log(pr[0])-logqu-logqv) + pr[1]*(np.log(pr[1])-logqu-logpv) + pr[2]*(np.log(pr[2])-logpu-logqv) + pr[3]*(np.log(pr[3])-logpu-logpv)
        G.add_edge(e[0], e[1], weight=-MI)
    

    # ET = MWST({I_uv})
    edges = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False)
    
    # test-set Log-Likelihood (LL) score
    nTest = mTest.shape[0]
    score = 0
    # degree of each node 
    deg = np.zeros(m, dtype='int')
    for e in edges:
        i = e[0]
        j = e[1]
        deg[i] += 1
        deg[j] += 1
        c00 = np.logical_and(mTest[:,i]==0, mTest[:,j]==0).sum()
        c01 = np.logical_and(mTest[:,i]==0, mTest[:,j]==1).sum()
        c10 = np.logical_and(mTest[:,i]==1, mTest[:,j]==0).sum()
        c11 = nTest - c00 - c01 - c10 
        pr = Puv[(i,j)]
        score += c00*np.log(pr[0]) + c01*np.log(pr[1]) + c10*np.log(pr[2]) + c11*np.log(pr[3]) 
    
    cnt = mTest.sum(axis = 0)
    score -= np.log(prob).dot(np.multiply(cnt, deg-1)) + np.log(1-prob).dot(np.multiply(nTest-cnt, deg-1))
    
    return score/nTest 
Пример #2
0
 def __init__(self, D):
     self.n = D.shape[0]
     G = nx.from_numpy_array(D)
     mst = tree.minimum_spanning_edges(G, algorithm="kruskal", data=True)
     self.edgelist = sorted([(e[0], e[1], e[2]['weight']) for e in list(mst)], 
                            key=lambda x: (x[2], x[0], [1]))
     self.UF = {(d,):0.0 for d in range(self.n)} #Union-Find with prev function images
     self.full = {(d,):0.0 for d in range(self.n)} #Union-Find without removing elements (for plotting) 
     self.output = []
     self._fit()
Пример #3
0
    def _make_raw_network(self):  # TODO make raw network
        """make the raw network:
           the network only with
           servers and stations not the users
           """
        network = nx.Graph()

        # add raw nodes
        network.add_nodes_from(self.servers_idx)
        network.add_nodes_from(self.stations_idx)

        # add location to nodes
        occupied_locs = set()
        for node in network.nodes:
            while True:
                loc = (round(random.uniform(0, self.length),
                             2), round(random.uniform(0, self.width), 2))
                if loc not in occupied_locs:
                    network.nodes[node]['loc'] = loc
                    occupied_locs.add(loc)
                    break

        # adding server-server edges to the network with spanning tree
        servers_subgraph = nx.complete_graph(
            network.subgraph(self.servers_idx).copy())
        for edge in servers_subgraph.edges:
            weight = self._euclidean_dis(network.nodes[edge[0]]['loc'],
                                         network.nodes[edge[1]]['loc'])
            servers_subgraph.edges[edge]['weight'] = weight
        mst = tree.minimum_spanning_edges(servers_subgraph,
                                          algorithm='kruskal',
                                          weight='weight',
                                          data=True)
        for edge in mst:
            network.add_edge(edge[0], edge[1], weight=edge[2]['weight'])

        # add station-server edges by connecting stations to their closest servers
        for station in self.stations_idx:
            dis_servers = {}
            for server in self.servers_idx:
                dis = self._euclidean_dis(network.nodes[station]['loc'],
                                          network.nodes[server]['loc'])
                dis_servers[server] = dis
            res = dict(
                sorted(dis_servers.items(),
                       key=itemgetter(1))[:self.server_station_con])
            for key, value in res.items():
                network.add_edge(station, key, weight=value)
        return network
Пример #4
0
def approx_TSP(P):
    'returns approx best tour of P'
    G = build_graph(P)

    mst = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False)
    edgelist = list(mst)

    T = nx.Graph()
    T.add_edges_from(edgelist)

    lst = list(nx.dfs_preorder_nodes(T))
    newLst = []
    for i in lst:
        newLst.append(P[i])
    #print(tour_length(newLst))
    return newLst
Пример #5
0
def Kruskals():
    global Graph, visuals, button, toolbar, G, fixed_nodes, fixed_positions, pos, graph
    mst = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False)
    edgelist = list(mst)
    print(edgelist)
    Label5.configure(text=edgelist)
    nx.draw(G,
            pos,
            fixed=None,
            with_labels=True,
            node_size=800,
            node_color='skyblue',
            node_shape="s",
            alpha=0.5,
            linewidths=10,
            font_size=8,
            font_weight='bold')
    labels = nx.get_edge_attributes(G, 'weight')
    nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)
    j = 0
    for i in edgelist:
        plt.title('Iteration {}'.format(j))
        j = j + 1
        nx.draw_networkx_edges(G,
                               pos,
                               edgelist=[i],
                               width=8,
                               alpha=0.5,
                               edge_color='r')
        print("Getting Inside")
        plt.pause(1)
        # plt.clf()
        # plt.show()
    kruskals = plt.text(
        0.5,
        -0.1,
        "Kruskal's Edges Order: " + str(edgelist),
        size=12,
        ha="center",
        transform=ax.transAxes)  #Shows the Caption below the graph
Пример #6
0
def checker(path_to_input, path_to_output):
    true_output = {}
    user_output = {}

    with open(path_to_input, 'r') as f:
        n, m, t = map(int, f.readline().split())
        if t == 3 or t == 4:
            G = nx.Graph()
        else:
            G = nx.DiGraph()
        for _ in range(m):
            if t == 1:
                i, j = map(int, f.readline().split())
                G.add_edge(i, j)
            elif t == 2:
                i, j, k = map(int, f.readline().split())
                G.add_edge(i, j, weight=k)
            elif t == 3:
                i, j = map(int, f.readline().split())
                G.add_edge(i, j)
            elif t == 4:
                i, j, k = map(int, f.readline().split())
                G.add_edge(i, j, weight=k)
    mst = list(tree.minimum_spanning_edges(G, algorithm="kruskal", data=True))

    # nx.draw_spring(G, with_labels = True)
    # plt.show()

    true_output['n'] = sum(x[-1]['weight']
                           for x in mst) if nx.is_connected(G) else -1

    with open(path_to_output, 'r') as f:
        output_len = int(f.readline())

        user_output['n'] = output_len

    return user_output, true_output, true_output['n'] == user_output['n']
Пример #7
0
while (cont < m):
    aresta = random.sample(range(
        0, n), 2)  # gera vetor de dois numeros aleatorios entre 0 e n
    if aresta not in arestas:
        arestas.append(aresta)
        cont += 1

g.add_edges_from(arestas)

nx.draw(g, with_labels=True, font_weight='bold')
plt.show()

start = timer()
kruskal = tree.minimum_spanning_edges(g,
                                      algorithm='kruskal',
                                      weight='weight',
                                      keys=True,
                                      data=True,
                                      ignore_nan=False)
k = nx.Graph()
k.add_edges_from(list(kruskal))
k.add_nodes_from(g.nodes)
end = timer()

arq.write(f"Tempo de execução Kruskal: {end-start}s\n")

nx.draw(k, with_labels=True, font_weight='bold')
plt.show()

kArestas = k.edges()  # arestas que são da AGM
gArestas = [aresta for aresta in g.edges()
            if aresta not in kArestas]  # arestas que não são da AGM
Пример #8
0
def creategraph(algo, data):
    file = data
    a1 = []
    a2 = []
    a3 = []
    a4 = []
    a5 = []
    splat = file.split("\r\n\r\n")
    for number, section in enumerate(splat, 1):
        if number % 5 == 1:
            a1 += [section]
        elif number % 5 == 2:
            a2 += [section]
        elif number % 5 == 3:
            a3 += [section]
        elif number % 5 == 4:
            a4 += [section]
        elif number % 5 == 0:
            a5 += [section]

    a2 = int(a2[0])
    a5 = int(a5[0])
    Ad = adjency_matrix(a4, a2)
    G = nx.from_numpy_matrix(Ad)
    labels = []
    a3 = a3[0].split("\r\n")
    Xn = []
    Yn = []
    for i in range(0, a2):
        a3[i] = a3[i].split("\t")
        Xn.append(float(a3[i][1]))
        Yn.append(float(a3[i][2]))

    for i in range(0, a2):
        labels.append(i)

    if algo == "Prims":
        mst = tree.minimum_spanning_edges(G, algorithm='prim', data=False)
        path = list(mst)
        length = 0
        for i in range(0, len(path)):
            length = length + Ad[path[i][0]][path[i][1]]
        title = "Prims Cost : " + str(length)
        Xe = []
        Ye = []
        Xe2 = []
        Ye2 = []
        for e in G.edges():
            for i in range(0, len(path) - 1):
                if (e[0] == path[i][0] and e[1] == path[i + 1][1]):
                    Xe2.extend([Xn[e[0]], Xn[e[1]], None])
                    Ye2.extend([Yn[e[0]], Yn[e[1]], None])
            Xe.extend([Xn[e[0]], Xn[e[1]], None])
            Ye.extend([Yn[e[0]], Yn[e[1]], None])
    elif algo == "Kruskals":
        mst = tree.minimum_spanning_edges(G, algorithm='prim', data=False)
        path = list(mst)
        length = 0
        for i in range(0, len(path)):
            length = length + Ad[path[i][0]][path[i][1]]
        title = "Kruskal Cost : " + str(length)
        Xe = []
        Ye = []
        Xe2 = []
        Ye2 = []
        for e in G.edges():
            for i in range(0, len(path) - 1):
                if (e[0] == path[i][0] and e[1] == path[i + 1][1]):
                    Xe2.extend([Xn[e[0]], Xn[e[1]], None])
                    Ye2.extend([Yn[e[0]], Yn[e[1]], None])
            Xe.extend([Xn[e[0]], Xn[e[1]], None])
            Ye.extend([Yn[e[0]], Yn[e[1]], None])
    elif algo == "Dijkstra":
        length, path = nx.bidirectional_dijkstra(G, a5, a2 - 1)
        title = "Dijkstra Cost : " + str(length)
        Xe = []
        Ye = []
        Xe2 = []
        Ye2 = []
        for e in G.edges():
            for i in range(0, len(path) - 1):
                if (e[0] == path[i] and e[1] == path[i + 1]):
                    Xe2.extend([Xn[e[0]], Xn[e[1]], None])
                    Ye2.extend([Yn[e[0]], Yn[e[1]], None])
            Xe.extend([Xn[e[0]], Xn[e[1]], None])
            Ye.extend([Yn[e[0]], Yn[e[1]], None])
    elif algo == "Bellman Ford":
        length, path = nx.single_source_bellman_ford(G, 6, a2 - 1)
        title = "Bellman Ford Cost : " + str(length)
        Xe = []
        Ye = []
        Xe2 = []
        Ye2 = []
        for e in G.edges():
            for i in range(0, len(path) - 1):
                if (e[0] == path[i] and e[1] == path[i + 1]):
                    Xe2.extend([Xn[e[0]], Xn[e[1]], None])
                    Ye2.extend([Yn[e[0]], Yn[e[1]], None])
            Xe.extend([Xn[e[0]], Xn[e[1]], None])
            Ye.extend([Yn[e[0]], Yn[e[1]], None])
    elif algo == "Floyd Warshall":
        path = nx.floyd_warshall_numpy(G)
        np.fill_diagonal(path, path.max())
        title = "Floyd Warshall Cost : " + str(path.min())
        Xe = []
        Ye = []
        Xe2 = []
        Ye2 = []
        for e in G.edges():
            Xe.extend([Xn[e[0]], Xn[e[1]], None])
            Ye.extend([Yn[e[0]], Yn[e[1]], None])
    elif algo == "Clustering":
        c = nx.average_clustering(G)
        title = "Clustering Cost : " + str(c)
        Xe = []
        Ye = []
        Xe2 = []
        Ye2 = []
        for e in G.edges():
            Xe.extend([Xn[e[0]], Xn[e[1]], None])
            Ye.extend([Yn[e[0]], Yn[e[1]], None])

    trace_nodes = dict(type='scatter',
                       x=Xn,
                       y=Yn,
                       mode='markers',
                       marker=dict(size=28, color='peru'),
                       text=labels,
                       hoverinfo='text')

    trace_edges = dict(type='scatter',
                       mode='lines',
                       arrowstyle='->',
                       x=Xe,
                       y=Ye,
                       line=dict(width=1, color='royalblue'),
                       hoverinfo='none')

    trace_edges2 = dict(type='scatter',
                        mode='lines',
                        x=Xe2,
                        y=Ye2,
                        line=dict(width=4.1, color="firebrick"),
                        hoverinfo='none')
    axis = dict(
        showline=True,  # hide axis line, grid, ticklabels and  title
        zeroline=False,
        showgrid=True,
        showticklabels=True,
        title='')

    layout = dict(
        title=title,
        font=dict(family='Balto'),
        showlegend=True,
        xaxis=axis,
        yaxis=axis,
    )

    annotations = []
    for k in range(a2):
        annotations.append(
            dict(
                text=labels[k],
                x=Xn[k],
                y=Yn[k],  #this additional value is chosen by trial and error
                xref='x1',
                yref='y1',
                font=dict(color='rgb(10,10,10)', size=14),
                showarrow=False))
    data = dict(data=[trace_edges, trace_edges2, trace_nodes], layout=layout)
    data['layout'].update(annotations=annotations)
    return data
Пример #9
0
pos = nx.circular_layout(G)
labels = nx.get_edge_attributes(G, 'weight')
print(
    "----------------------------------Output MST----------------------------------"
)
print("Nodes of the Graph:", G.nodes())
print("Edges of the Graph:", G.edges())
nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)
nx.draw(G, pos, with_labels=True, node_color='y')
plt.savefig("Problem_graph2.png")  #save as png
plt.show()
#KRUSKAL ALGORITHM
print("*********************************************************************")
print("The Solution using Kruskal Algorithm is shown:")
h = nx.Graph()
final = tree.minimum_spanning_edges(G, algorithm='kruskal', data=True)
edgelist = list(final)
print(sorted(edgelist))
h.add_edges_from(sorted(edgelist))
nx.draw_networkx_edge_labels(h, pos, edge_labels=labels)
nx.draw(h, pos, with_labels=True, node_color='b')
plt.savefig("kruskal.png")  #save as png
plt.show()
#PRIMS ALGORITHM
print("*********************************************************************")
print("The Solution using Prims Algorithm is shown:")
j = nx.Graph()
final2 = tree.minimum_spanning_edges(G, algorithm='prim', data=True)
edgelist2 = list(final2)
print(sorted(edgelist2))
j.add_edges_from(sorted(edgelist2))
Пример #10
0
from networkx.algorithms import tree
import networkx as nx
import matplotlib.pyplot as plt

G = nx.cycle_graph(4)
G.add_edge(0, 3, weight=2)

nx.draw_planar(G=G,
               with_labels=True,
               node_color='g',
               node_size=800,
               font_size=14,
               width=0.8)

plt.show()

mst = tree.minimum_spanning_edges(G, algorithm="prim", data=False)

edgelist = list(mst)

print(sorted(sorted(e) for e in edgelist))

# Reference:
# https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.tree.mst.minimum_spanning_edges.html
Пример #11
0
def edgeWeightedGraph(fileName="./" + SUBDIRNAME + "/ewg.png"):
    print()
    G = nx.Graph()
    G.add_edges_from([
        (0, 7, {
            'weight': 0.16
        }),
        (2, 3, {
            'weight': 0.17
        }),
        (1, 7, {
            'weight': 0.19
        }),
        (0, 2, {
            'weight': 0.26
        }),
        (5, 7, {
            'weight': 0.28
        }),
        (1, 3, {
            'weight': 0.29
        }),
        (1, 5, {
            'weight': 0.32
        }),
        (2, 7, {
            'weight': 0.34
        }),
        (4, 5, {
            'weight': 0.35
        }),
        (1, 2, {
            'weight': 0.36
        }),
        (4, 7, {
            'weight': 0.37
        }),
        (0, 4, {
            'weight': 0.38
        }),
        (6, 2, {
            'weight': 0.40
        }),
        (3, 6, {
            'weight': 0.52
        }),
        (6, 0, {
            'weight': 0.58
        }),
        (6, 4, {
            'weight': 0.93
        }),
    ])

    # print(list(G.nodes))

    print('Minimum Spanning Tree')
    # mst = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False)
    mst = tree.minimum_spanning_edges(G, algorithm='prim', data=False)
    edgelist = list(mst)
    edgeAttr = nx.get_edge_attributes(G, 'weight')

    # T = nx.minimum_spanning_tree(G)
    # print (sorted(T.edges(data=True)))

    mstWeight = 0
    print('Edge Weight List')
    for edge in edgelist:
        mstWeight += float(edgeAttr[edge])
        print(str(list(edge)) + '\t' + str(edgeAttr[edge]))

    print('Total MST Weight: ' + str(mstWeight))

    # print(sorted(edgelist))
    # print('List of edges')

    labels = nx.get_edge_attributes(G, 'weight')
    pos = nx.circular_layout(G)
    nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)
    nx.draw(G, pos, with_labels=True, font_weight='bold')
    print("Saving file to " + str(fileName))
    plt.savefig(fileName)
    print()
                            create_using=nx.DiGraph(),
                            nodetype=int)
pos = nx.spring_layout(subgraph)
print(nx.info(subgraph))
nx.draw(subgraph,
        pos,
        node_color='#A0CBE2',
        edge_color='#00bb5e',
        width=1,
        edge_cmap=plt.cm.Blues,
        with_labels=True)
plt.savefig("graph_sample.pdf")

#%%

mst = tree.minimum_spanning_edges(g, algorithm='prim', data=False)
edgelist = list(mst)
sorted(edgelist)

#%%

# No of Unique nodes

degree_sequence = list(g.degree())
nb_nodes = len(g.nodes())
nb_arr = len(g.edges())
avg_degree = np.mean(np.array(degree_sequence)[:, 1])
med_degree = np.median(np.array(degree_sequence)[:, 1])
max_degree = max(np.array(degree_sequence)[:, 1])
min_degree = np.min(np.array(degree_sequence)[:, 1])
Пример #13
0
def MTRF(mTrain, mTest, ntree, perczero):
    # number of trees
    k = ntree

    # number of features
    m = mTrain.shape[1]
    # size of training set
    n = mTrain.shape[0]

    # number of zero mutual information
    r = int(m*(m-1)/2*perczero)


    # initialization 


    # index correspondence
    ind = np.zeros(m, dtype = 'int')
    id_ = 0
    for u in range(m):
        ind[u] = id_
        for v in range(u+1,m):
            id_ += 1

    # size of test set
    nTest = mTest.shape[0]
    # positives in test set
    cntTest = mTest.sum(axis = 0)

    # test set score in bootstrap sample
    score = np.zeros(k)

    for k_ in range(k):
        boot = np.random.choice(range(n), size = n, replace = True) 
        # marginals
        # Compute marginal distributions P_v, P_uv
        # numbet of positives
        cnt = mTrain[boot].sum(axis = 0) 
        # MLE with 1-Laplace smoothing
        prob = (cnt+1)/(n+2)
        # pairwise marginal table
        Puv = np.zeros((int(m*(m-1)/2), 4))
        
        id_ = 0
        # Compute mutual information values I_uv
        G = nx.cycle_graph(m)
        for u in range(m):
            for v in range(u+1,m):
                c00 = np.logical_and(mTrain[boot,u]==0, mTrain[boot,v]==0).sum()
                c01 = np.logical_and(mTrain[boot,u]==0, mTrain[boot,v]==1).sum()
                c10 = np.logical_and(mTrain[boot,u]==1, mTrain[boot,v]==0).sum()
                p00 = (c00+1)/(n+4)
                p01 = (c01+1)/(n+4)
                p10 = (c10+1)/(n+4)
                p11 = (n-c00-c01-c10+1)/(n+4)
                Puv[id_,0] = p00
                Puv[id_,1] = p01
                Puv[id_,2] = p10
                Puv[id_,3] = p11
                logpu = np.log(prob[u])
                logqu = np.log(1-prob[u])
                logpv = np.log(prob[v])
                logqv = np.log(1-prob[v])
                MI = p00*(np.log(p00)-logqu-logqv) + p01*(np.log(p01)-logqu-logpv) + p10*(np.log(p10)-logpu-logqv) + p11*(np.log(p11)-logpu-logpv)
                G.add_edge(u, v, weight=-MI)
                id_ += 1
      
        # randomly setting exactly r mutual information scores to 0
        edges = list(G.edges)
        zeroset = np.random.choice(range(len(edges)), size = r)
        for i in zeroset:
            G[edges[i][0]][edges[i][1]]['weight'] = 0           
        
        # print(k_+1,' tree')
        # for e in edges:
            # u = e[0]
            # v = e[1]
            # print(u, v, G[u][v])
        
        # ET = MWST({I_uv})
        edges = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False)         
        # degree of each node 
        deg = np.zeros(m, dtype='int')

        for e in edges:
            u = e[0]
            v = e[1]
            deg[u] += 1
            deg[v] += 1
            c00 = np.logical_and(mTest[:,u]==0, mTest[:,v]==0).sum()
            c01 = np.logical_and(mTest[:,u]==0, mTest[:,v]==1).sum()
            c10 = np.logical_and(mTest[:,u]==1, mTest[:,v]==0).sum()
            c11 = nTest - c00 - c01 - c10 
            tmp = Puv[ind[u]+v-u-1]
            score[k_] += c00*np.log(tmp[0]) + c01*np.log(tmp[1]) + c10*np.log(tmp[2]) + c11*np.log(tmp[3]) 

        score[k_] -= np.log(prob).dot(np.multiply(cntTest, deg-1)) + np.log(1-prob).dot(np.multiply(nTest-cntTest, deg-1))


    avgScore = np.mean(score)/nTest
    medScore = np.median(score)/nTest
    return avgScore, medScore   
Пример #14
0
def MTEM(mTrain, mTest, ntree):
    # number of trees
    k = ntree

    # number of iterations
    niter = 100
    # number of features
    m = mTrain.shape[1]
    # size of training set
    n = mTrain.shape[0]

    # initialization 
    # mixture coefficient
    lam = np.ones(k)/k
    # marginals
    # prob = np.random.rand(k,m)
    prob = np.ones((k,m)) * 0.5
    # pairwise marginal table
    Puv = np.zeros((k,int(m*(m-1)/2), 4))

    gamma = np.zeros((k,n))
    # gammau = np.zeros((k,n))
    
    # index correspondence
    ind = np.zeros(m, dtype = 'int')
    id_ = 0
    for u in range(m):
        ind[u] = id_
        for v in range(u+1,m):
            id_ += 1
    
    # initialization by M step
    # MLE with 1-Laplace smoothing
    prob_init = (mTrain.sum(axis = 0) +1)/(n+2)
    Puv_init = np.zeros((int(m*(m-1)/2), 4))
    
    id_ = 0            
    for u in range(m):
        for v in range(u+1,m):
            c00 = np.logical_and(mTrain[:,u]==0, mTrain[:,v]==0).sum()
            c01 = np.logical_and(mTrain[:,u]==0, mTrain[:,v]==1).sum()
            c10 = np.logical_and(mTrain[:,u]==1, mTrain[:,v]==0).sum()
            Puv_init[id_,0] = (c00+1)/(n+4)
            Puv_init[id_,1] = (c01+1)/(n+4)
            Puv_init[id_,2] = (c10+1)/(n+4)
            Puv_init[id_,3] = (n-c00-c01-c10+1)/(n+4)
            id_ += 1
            
    for k_ in range(k):
        gamma[k_] = np.zeros(n)
        
        edges = nx.generators.trees.random_tree(m).edges
        deg = np.zeros(m, dtype='int')
    
        # training-set Log-Likelihood  
        # compute degree of each node 
        id_ = 0
        for e in edges:
            u = e[0]
            v = e[1]
            deg[u] += 1
            deg[v] += 1
            tmp = np.log(Puv_init[ind[u]+v-u-1])
            gamma[k_] += np.logical_and(mTrain[:,u]==0, mTrain[:,v]==0)*tmp[0]
            gamma[k_] += np.logical_and(mTrain[:,u]==0, mTrain[:,v]==1)*tmp[1]
            gamma[k_] += np.logical_and(mTrain[:,u]==1, mTrain[:,v]==0)*tmp[2]
            gamma[k_] += np.logical_and(mTrain[:,u]==1, mTrain[:,v]==1)*tmp[3]
            id_ += 1
        
        gamma[k_] -= mTrain.dot(np.multiply(np.log(prob_init), deg-1)) + (1-mTrain).dot(np.multiply(np.log(1-prob_init), deg-1))
      
    # lambda_k * T_k  propto Pr(X,Z)
    gamma = np.multiply(lam.reshape(-1,1), np.exp(gamma))
    # posterior probability of the hidden variable Pr(Z | X)
    gamma = gamma / gamma.sum(axis=0)

    # postives in each column
    cntTrain = mTrain.sum(axis = 0)
           
    # edges of each tree
    edgeset = np.zeros((k,m-1,2), dtype='int')
    # degrees of eaxh tree
    degs = np.zeros((k,m), dtype='int')
            
    it = 0
    score = 0
    dscore = 1

    while it < niter and abs(dscore) > 5e-3:
        score0 = score
               
        # E step
        Gam = gamma.sum(axis=1)
        # P^k(x^i)
        gamma = np.divide(gamma,Gam.reshape(-1,1))
        # print(gamma)
        
        # M step
        # MLE of mixture coef
        lam = Gam/n  
        # print('iter ', it+1, ' lambda: ', lam)
        
        for k_ in range(k):
            # MLE with 1-Laplace smoothing
            prob[k_] = (np.multiply(gamma[k_].reshape(-1,1),mTrain).sum(axis=0)*n +1)/(n+2)
        
            id_ = 0
            # Compute mutual information values I_uv
            G = nx.cycle_graph(m)
            for u in range(m):
                for v in range(u+1,m):
                    c00 = gamma[k_][np.logical_and(mTrain[:,u]==0, mTrain[:,v]==0)].sum()*n
                    c01 = gamma[k_][np.logical_and(mTrain[:,u]==0, mTrain[:,v]==1)].sum()*n
                    c10 = gamma[k_][np.logical_and(mTrain[:,u]==1, mTrain[:,v]==0)].sum()*n                
                    # c11 = gamma[k_][np.logical_and(mTrain[:,u]==1, mTrain[:,v]==1)].sum()*n
                    p00 = (c00+1)/(n+4)
                    p01 = (c01+1)/(n+4)
                    p10 = (c10+1)/(n+4)
                    # p11 = (n-c00-c01-c10+1)/(n+4)
                    p11 = 1 - p00 - p01 - p10
                    Puv[k_,id_,0] = p00
                    Puv[k_,id_,1] = p01
                    Puv[k_,id_,2] = p10
                    Puv[k_,id_,3] = p11
                    logpu = np.log(prob[k_,u])
                    logqu = np.log(1-prob[k_,u])
                    logpv = np.log(prob[k_,v])
                    logqv = np.log(1-prob[k_,v])
                    MI = p00*(np.log(p00)-logqu-logqv) + p01*(np.log(p01)-logqu-logpv) + p10*(np.log(p10)-logpu-logqv) + p11*(np.log(p11)-logpu-logpv)
                    G.add_edge(u, v, weight=-MI)
                    id_ += 1
            
            # ET = MWST({I_uv})
            edges = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False)
            deg = np.zeros(m, dtype='int')          
            
            gamma[k_] = np.zeros(n)
        
            # training-set Log-Likelihood  
            # compute degree of each node 
            id_ = 0
            for e in edges:
                u = e[0]
                v = e[1]
                edgeset[k_,id_,0] = u
                edgeset[k_,id_,1] = v
                deg[u] += 1
                deg[v] += 1
                tmp = np.log(Puv[k_,ind[u]+v-u-1])
                gamma[k_] += np.logical_and(mTrain[:,u]==0, mTrain[:,v]==0)*tmp[0]
                gamma[k_] += np.logical_and(mTrain[:,u]==0, mTrain[:,v]==1)*tmp[1]
                gamma[k_] += np.logical_and(mTrain[:,u]==1, mTrain[:,v]==0)*tmp[2]
                gamma[k_] += np.logical_and(mTrain[:,u]==1, mTrain[:,v]==1)*tmp[3]
                id_ += 1
            
            # print(deg-1)
            degs[k_] = deg
            gamma[k_] -= mTrain.dot(np.multiply(np.log(prob[k_]), deg-1)) + (1-mTrain).dot(np.multiply(np.log(1-prob[k_]), deg-1))
            
        # print('prob',prob)
        # print('Puv',Puv[k_].sum(axis=1))
        
        gamma = np.multiply(lam.reshape(-1,1), np.exp(gamma))   
         
        score = np.log(gamma.sum(axis=0)).sum()/n
        dscore = score - score0
        it += 1
        # print(it, dscore, score, score0)
        
        # posterior probability of the hidden variable 
        gamma = gamma / gamma.sum(axis=0)

    if it < 100:
        print('\tEM converges in ', it, ' iteration. score = ', score)   
    else:
        print('\tEM can not converges in ', it, ' iteration. score = ', score) 

    # size of test set
    nTest = mTest.shape[0]

    Lik = np.zeros((k,nTest))

    # test-set Log-Likelihood  
    for k_ in range(k):
        for e in edgeset[k_]:
            u = e[0]
            v = e[1]
            tmp = np.log(Puv[k_,ind[u]+v-u-1])
            Lik[k_] += np.logical_and(mTest[:,u]==0, mTest[:,v]==0)*tmp[0]
            Lik[k_] += np.logical_and(mTest[:,u]==0, mTest[:,v]==1)*tmp[1]
            Lik[k_] += np.logical_and(mTest[:,u]==1, mTest[:,v]==0)*tmp[2]
            Lik[k_] += np.logical_and(mTest[:,u]==1, mTest[:,v]==1)*tmp[3]
        
        Lik[k_] -= mTest.dot(np.multiply(np.log(prob[k_]), degs[k_]-1)) + (1-mTest).dot(np.multiply(np.log(1-prob[k_]), degs[k_]-1))
    Lik = np.multiply(lam.reshape(-1,1), np.exp(Lik)) 
    score = np.log(Lik.sum(axis=0)).sum()/nTest
    return score
Пример #15
0
import networkx as nx
import matplotlib.pyplot as plt
from networkx.algorithms import tree

#punto1
#CREAZIONE GRAFO E ARCHI
G = nx.Graph()
G.add_edge(0, 2, weight=49)
G.add_edge(0, 4, weight=43)
G.add_edge(1, 3, weight=31)
G.add_edge(1, 2, weight=56)

#CREAZIONE MINIMUM SPANNING TREE, LISTA DEGLI ARCHI ORDINE CRESCENTE
mst = tree.minimum_spanning_edges(G, algorithm='kruskal', data=True)
print("tipo", type(mst))
edgelist = list(mst)

#CREAZIONE GRAFO COMPLETO CON PESO ARCHI A NONE
G_complete = nx.complete_graph(5)

GrafoDioCane = nx.Graph()

#ASSEGNAZIONE DEL PESO DEGLI ARCHI DEL MST AL GRAFO COMPLETO
for i in edgelist:
    a = int(i[0])
    b = int(i[1])
    G_complete[a][b]['weight'] = i[2].get("weight")

#LISTA DEGLI ARCHI ORDINE DECRESCENTE
print(edgelist)
Пример #16
0
from networkx.algorithms import tree
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
sns.set()
G = nx.Graph()
sub_G = nx.Graph()
reader_points = pd.read_excel(
    r'E:\learning_pymc3\Thesis_Code\Prim_Algorithm\kashan\kashan_points.xlsx')
reader_cost = pd.read_excel(
    r'E:\learning_pymc3\Thesis_Code\Prim_Algorithm\kashan\kashan_cost.xlsx')
matrix_cost = np.array(reader_cost)
matrix_points = np.array(reader_points)
for i in range(matrix_points.shape[0]):
    G.add_node(i, pos=(matrix_points[i, 1], matrix_points[i, 2]))
for i in range(matrix_cost.shape[0]):
    G.add_edge(matrix_cost[i, 0], matrix_cost[i, 1], weight=matrix_cost[i, 2])
pos = nx.get_node_attributes(G, 'pos')
weight = nx.get_edge_attributes(G, 'weight')
mst = list(tree.minimum_spanning_edges(G, algorithm='prim', data=False))
print(list(mst))

for i in range(matrix_points.shape[0]):
    sub_G.add_node(i, pos=(matrix_points[i, 1], matrix_points[i, 2]))
sub_G.add_edges_from(mst)

nx.draw_networkx(G, pos)
#nx.draw_networkx_edge_labels(G,pos,edge_labels=weight,font_color='brown')
nx.draw_networkx_edges(G, pos, edgelist=mst, edge_color='r', width=5)
plt.savefig("Prim_network.jpg", dpi=300, bbox_inches='tight')
Пример #17
0
def TreeBN2(mTrain, mTest):
    # number of observations
    n = mTrain.shape[0]
    # number of features
    m = mTrain.shape[1]

    # Compute marginal distributions P_v, P_uv
    # numbet of positives
    cnt = mTrain.sum(axis = 0) 
    # MLE with 1-Laplace smoothing
    prob = (cnt+1)/(n+2)
    # pairwise marginal table (i,j,00,01,10,11)
    Puv = np.zeros((int(m*(m-1)/2), 4))
    id_ = 0
    ind = np.zeros(m, dtype = 'int')

    # Compute mutual information values I_uv
    G = nx.cycle_graph(m)
    for i in range(m):
        ind[i] = id_
        for j in range(i+1,m):
            c00 = np.logical_and(mTrain[:,i]==0, mTrain[:,j]==0).sum()
            c01 = np.logical_and(mTrain[:,i]==0, mTrain[:,j]==1).sum()
            c10 = np.logical_and(mTrain[:,i]==1, mTrain[:,j]==0).sum()
            p00 = (c00+1)/(n+4)
            p01 = (c01+1)/(n+4)
            p10 = (c10+1)/(n+4)
            p11 = (n-c00-c01-c10+1)/(n+4)
            Puv[id_,0] = p00
            Puv[id_,1] = p01
            Puv[id_,2] = p10
            Puv[id_,3] = p11
            logpu = np.log(prob[i])
            logqu = np.log(1-prob[i])
            logpv = np.log(prob[j])
            logqv = np.log(1-prob[j])
            MI = p00*(np.log(p00)-logqu-logqv) + p01*(np.log(p01)-logqu-logpv) + p10*(np.log(p10)-logpu-logqv) + p11*(np.log(p11)-logpu-logpv)
            G.add_edge(i, j, weight=-MI)
            id_ += 1
            

    # ET = MWST({I_uv})
    edges = tree.minimum_spanning_edges(G, algorithm='kruskal', data=False)
    
    # test-set Log-Likelihood (LL) score
    nTest = mTest.shape[0]
    score = 0
    # degree of each node 
    deg = np.zeros(m, dtype='int')
    for e in edges:
        i = e[0]
        j = e[1]
        deg[i] += 1
        deg[j] += 1
        c00 = np.logical_and(mTest[:,i]==0, mTest[:,j]==0).sum()
        c01 = np.logical_and(mTest[:,i]==0, mTest[:,j]==1).sum()
        c10 = np.logical_and(mTest[:,i]==1, mTest[:,j]==0).sum()
        c11 = nTest - c00 - c01 - c10 
        tmp = Puv[ind[i]+j-i-1]
        score += c00*np.log(tmp[0]) + c01*np.log(tmp[1]) + c10*np.log(tmp[2]) + c11*np.log(tmp[3]) 
    
    cnt = mTest.sum(axis = 0)
    score -= np.log(prob).dot(np.multiply(cnt, deg-1)) + np.log(1-prob).dot(np.multiply(nTest-cnt, deg-1))
    
    return score/nTest