Пример #1
0
 def test_from_adjacency_named(self):
     # example from issue #3105
     data = {"A": {"A": 0, "B": 0, "C": 0},
             "B": {"A": 1, "B": 0, "C": 0},
             "C": {"A": 0, "B": 1, "C": 0}}
     dftrue = pd.DataFrame(data)
     df = dftrue[["A", "C", "B"]]
     G = nx.from_pandas_adjacency(df, create_using=nx.DiGraph())
     df = nx.to_pandas_adjacency(G, dtype=int)
     pd.testing.assert_frame_equal(df, dftrue)
Пример #2
0
 def test_roundtrip(self):
     # edgelist
     Gtrue = nx.Graph([(1, 1), (1, 2)])
     df = nx.to_pandas_edgelist(Gtrue)
     G = nx.from_pandas_edgelist(df)
     assert_graphs_equal(Gtrue, G)
     # adjacency
     Gtrue = nx.Graph(({1: {1: {'weight': 1}, 2: {'weight': 1}}, 2: {1: {'weight': 1}}}))
     df = nx.to_pandas_adjacency(Gtrue, dtype=int)
     G = nx.from_pandas_adjacency(df)
     assert_graphs_equal(Gtrue, G)
Пример #3
0
def ReadGraph(adress):
    ds = pd.read_csv(adress, header=None)
    G = nx.from_pandas_adjacency(ds)
    return G
Пример #4
0
import numpy as np
from src.hierarchy import signal_flow
from graspy.models import SBMEstimator

node_signal_flow = signal_flow(adj)
mean_sf = np.zeros(k)
for i in np.unique(pred_labels):
    inds = np.where(pred_labels == i)[0]
    mean_sf[i] = np.mean(node_signal_flow[inds])

cluster_mean_latent = gmm.model_.means_[:, 0]
block_probs = SBMEstimator().fit(bin_adj, y=pred_labels).block_p_
block_prob_df = pd.DataFrame(data=block_probs,
                             index=range(k),
                             columns=range(k))
block_g = nx.from_pandas_adjacency(block_prob_df, create_using=nx.DiGraph)
plt.figure(figsize=(10, 10))
# don't ever let em tell you you're too pythonic
pos = dict(zip(range(k), zip(cluster_mean_latent, mean_sf)))
# nx.draw_networkx_nodes(block_g, pos=pos)
labels = nx.get_edge_attributes(block_g, "weight")

# nx.draw_networkx_edge_labels(block_g, pos, edge_labels=labels)

norm = mpl.colors.LogNorm(vmin=0.01, vmax=0.1)

sm = ScalarMappable(cmap="Reds", norm=norm)
cmap = sm.to_rgba(np.array(list(labels.values())) + 0.01)
nx.draw_networkx(
    block_g,
    pos,
Пример #5
0
def read_h5(filename: str = 'dandelion_data.h5') -> Dandelion:
    """
    Read in and returns a `Dandelion` class from .h5 format.

    Parameters
    ----------
    filename : str
        path to `.h5` file

    Returns
    -------
    `Dandelion` object.
    """
    try:
        data = pd.read_hdf(filename, 'data')
        data = sanitize_data(data)

        if check_mix_dtype(data):
            for x in return_mix_dtype(data):
                data[x].replace('', pd.NA, inplace=True)
            data = sanitize_data(data)
    except:
        raise AttributeError(
            '{} does not contain attribute `data`'.format(filename))
    try:
        metadata = pd.read_hdf(filename, 'metadata')
    except:
        pass

    try:
        edges = pd.read_hdf(filename, 'edges')
    except:
        pass

    try:
        g_0 = pd.read_hdf(filename, 'graph/graph_0')
        g_1 = pd.read_hdf(filename, 'graph/graph_1')
        g_0 = g_0 + 1
        g_0 = g_0.fillna(0)
        g_1 = g_1 + 1
        g_1 = g_1.fillna(0)
        graph0 = nx.from_pandas_adjacency(g_0)
        graph1 = nx.from_pandas_adjacency(g_1)
        for u, v, d in graph0.edges(data=True):
            d['weight'] = d['weight'] - 1
        for u, v, d in graph1.edges(data=True):
            d['weight'] = d['weight'] - 1
        graph = (graph0, graph1)
    except:
        pass

    with h5py.File(filename, 'r') as hf:
        try:
            layout0 = {}
            for k in hf['layout/layout_0'].attrs.keys():
                layout0.update({k: np.array(hf['layout/layout_0'].attrs[k])})
            layout1 = {}
            for k in hf['layout/layout_1'].attrs.keys():
                layout1.update({k: np.array(hf['layout/layout_1'].attrs[k])})
            layout = (layout0, layout1)
        except:
            pass

        germline = {}
        try:
            for g in hf['germline'].attrs:
                germline.update({g: hf['germline'].attrs[g]})
        except:
            pass

        distance = Tree()
        try:
            for d in hf['distance'].keys():
                d_ = pd.read_hdf(filename, 'distance/' + d)
                distance[d] = scipy.sparse.csr_matrix(d_.values)
        except:
            pass

        try:
            threshold = np.float(np.array(hf['threshold']))
        except:
            threshold = None

    constructor = {}
    constructor['data'] = data
    if 'metadata' in locals():
        constructor['metadata'] = metadata
    if 'germline' in locals():
        constructor['germline'] = germline
    if 'edges' in locals():
        constructor['edges'] = edges
    if 'distance' in locals():
        constructor['distance'] = distance
    if 'layout' in locals():
        constructor['layout'] = layout
    if 'graph' in locals():
        constructor['graph'] = graph
    try:
        res = Dandelion(**constructor)
    except:
        res = Dandelion(**constructor, initialize=False)

    if 'threshold' in locals():
        res.threshold = threshold
    else:
        pass
    return (res)
Пример #6
0
def ReadDiGraph(adress):
    ds = pd.read_csv(adress,header=None)
    G = nx.from_pandas_adjacency(ds,create_using=nx.DiGraph())
    return G
Пример #7
0
            size=15,
            line_width=2))

    node_trace.text = list(pos.keys())
    traces = edges_list + [node_trace]

    layout = go.Layout(
        title=title,
        # titlefont_size=16,
        # template='plotly_white',
        font=dict(color="white"),
        paper_bgcolor="#2c2f38",
        plot_bgcolor='#2c2f38',
        showlegend=False,
        hovermode='closest',
        margin=dict(b=20, l=5, r=5, t=40),
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))

    return dict(data=traces, layout=layout)


distance = get_network_data()


complete_graph = nx.from_pandas_adjacency(distance)


def get_nplot(cluster, title):
    fig = go.Figure(network_plot(cluster, complete_graph, title))
    return fig
Пример #8
0
                    if '0229' not in dm and year not in ('2019', '2018',
                                                         '1861', '1862'):
                        if year not in year_data:
                            year_data[year] = {}
                        year_data[year][dm] = int(new_line[3])

    df = pd.DataFrame(year_data)
    df['mmdd'] = df.index
    df.reset_index(drop=True, inplace=True)
    return df


if __name__ == "__main__":

    df_cor = df.drop(columns=['mmdd']).corr(method='pearson', min_periods=1)
    network_df = nx.from_pandas_adjacency(df_cor, create_using=None)
    # nx.draw_spring(network_df)
    # plt.show()

    thresh_vals = [0.3, 0.4, 0.5, 0.6, 0.65, 0.7, 0.75]
    df_cor_clean = df_cor.copy()
    df_cor_clean[df_cor_clean < 0.4] = 0

    network_df = nx.from_pandas_adjacency(df_cor_clean, create_using=None)
    # pos = nx.spring_layout(network_df,scale=2)

    # nx.draw(G,pos,font_size=8)
    # plt.show()

    # G = nx.path_graph(4)
    pos = nx.spring_layout(network_df)
Пример #9
0
    # plot heatmap
    ax = sns.heatmap(R_squared, xticklabels=1, yticklabels=1)
    plt.show()

    # plot correlation graph, assuming > 0.3 signifies an existing correlation
    corr_graph_vals = R_squared[R_squared > 0.3].fillna(0)
    corr_graph_vals = corr_graph_vals.round(3)

    # remove diagonal entries, and remove variables with no strong correlation
    cols = corr_graph_vals.columns
    for c in cols:
        corr_graph_vals[c][c] = 0
    var_mask = (corr_graph_vals.T != 0).any()
    corr_graph_vals = corr_graph_vals[var_mask]
    corr_graph_vals = corr_graph_vals.T[var_mask].T
    corr_graph = nx.from_pandas_adjacency(corr_graph_vals)

    pos = nx.spring_layout(corr_graph)
    nx.draw_networkx(corr_graph, pos)
    weights = nx.get_edge_attributes(corr_graph, 'weight')
    nx.draw_networkx_edge_labels(corr_graph, pos, edge_labels=weights)
    plt.show()

    # print correlated variables to SalePrice
    pd.set_option('display.max_rows', len(corr_matrix))
    print("R^2:", R_squared.sort_values(by=['SalePrice'], ascending='True'))

    # step: Choosen highest R^2 variables with respect to Y (SalePrice)
    # Note, these are chosen without first transforming the variables
    primary_vars = [
        'SalePrice', 'OverallQual', 'GrLivArea', 'GarageCars', 'GarageArea'
Пример #10
0
        def make_plot(self):
            from graphion.session.handler import get_directed  # dependency cycle fix

            if get_directed(self.sid):
                G = from_pandas_adjacency(df, create_using=DiGraph)
            else:
                G = from_pandas_adjacency(df, create_using=Graph)
            self.nodeCount = number_of_nodes(G)
            """
            Create NetworkX graph layout manager
            """
            if diagramType == "FORCE":
                layout = spring_layout(G,
                                       k=10.42 / sqrt(self.nodeCount),
                                       seed=server.config['SEED'])
            elif diagramType == "HIERARCHICAL":
                if self.nodeCount > 1:
                    layout = graphviz_layout(Graph([
                        (u, v, d) for u, v, d in G.edges(data=True)
                    ]),
                                             prog='dot')
                else:
                    layout = circular_layout(
                        G
                    )  # graphviz_layout does not work with one node, just display a "circular_layout"
            elif diagramType == "RADIAL":
                layout = circular_layout(G)
            else:
                pass

            # get node and edge information from graph
            nodes, nodes_coordinates = zip(*sorted(layout.items()))
            nodes_x, nodes_y = list(zip(*nodes_coordinates))

            # calculate centrality
            centrality = degree_centrality(G)
            _, nodeCentralities = zip(*sorted(centrality.items()))

            if self.nodeCount > 1:
                # get degree information
                if is_directed(G):
                    inDegreeSize = dict(G.in_degree)
                    inDegree = inDegreeSize.copy()
                    outDegreeSize = dict(G.out_degree)
                    outDegree = outDegreeSize.copy()
                    totalDegreeSize = {}
                    for n in nodes:
                        totalDegreeSize[n] = inDegreeSize[n] + outDegreeSize[n]
                    totalDegree = totalDegreeSize.copy()
                else:
                    inDegreeSize = dict(G.degree)
                    inDegree = inDegreeSize.copy()
                    outDegreeSize = inDegreeSize.copy()
                    outDegree = inDegreeSize.copy()
                    totalDegreeSize = inDegreeSize.copy()
                    totalDegree = inDegreeSize.copy()

                # get weight information
                if is_directed(G):
                    inWeightSize = dict(G.in_degree(weight='weight'))
                    inWeight = inWeightSize.copy()
                    outWeightSize = dict(G.out_degree(weight='weight'))
                    outWeight = outWeightSize.copy()
                    totalWeightSize = {}
                    for n in nodes:
                        totalWeightSize[n] = inWeightSize[n] + outWeightSize[n]
                    totalWeight = totalWeightSize.copy()
                else:
                    inWeightSize = dict(G.degree(weight='weight'))
                    inWeight = inWeightSize.copy()
                    outWeightSize = inWeightSize.copy()
                    outWeight = inWeightSize.copy()
                    totalWeightSize = inWeightSize.copy()
                    totalWeight = inWeightSize.copy()

                # Creating a scale to ensure that the node sizes don't go bananas
                minNodeSize = 0.1  # minNodeSize * maxNodeSize = minimum node size
                maxIn = -maxsize - 1
                minIn = maxsize
                maxOut = -maxsize - 1
                minOut = maxsize
                maxTot = -maxsize - 1
                minTot = maxsize
                maxInw = -maxsize - 1
                minInw = maxsize
                maxOutw = -maxsize - 1
                minOutw = maxsize
                maxTotw = -maxsize - 1
                minTotw = maxsize
                for n in nodes:
                    ind = inDegreeSize[n]
                    outd = outDegreeSize[n]
                    totd = totalDegreeSize[n]
                    inw = inWeightSize[n]
                    outw = outWeightSize[n]
                    totw = totalWeightSize[n]
                    if ind > maxIn:
                        maxIn = ind
                    elif ind < minIn:
                        minIn = ind
                    if outd > maxOut:
                        maxOut = outd
                    elif outd < minOut:
                        minOut = outd
                    if totd > maxTot:
                        maxTot = totd
                    elif totd < minTot:
                        minTot = totd
                    if inw > maxInw:
                        maxInw = inw
                    elif inw < minInw:
                        minInw = inw
                    if outw > maxOutw:
                        maxOutw = outw
                    elif outw < minOutw:
                        minOutw = outw
                    if totw > maxTotw:
                        maxTotw = totw
                    elif totw < minTotw:
                        minTotw = totw

                if maxIn == minIn:
                    sameInDegree = True
                else:
                    sameInDegree = False
                    for n in nodes:
                        result = (inDegreeSize[n] - minIn) / maxIn
                        if result < minNodeSize:
                            inDegreeSize[n] = minNodeSize
                        else:
                            inDegreeSize[n] = result
                if maxOut == minOut:
                    sameOutDegree = True
                else:
                    sameOutDegree = False
                    for n in nodes:
                        result = (outDegreeSize[n] - minOut) / maxOut
                        if result < minNodeSize:
                            outDegreeSize[n] = minNodeSize
                        else:
                            outDegreeSize[n] = result
                if maxTot == minTot:
                    sameTotalDegree = True
                else:
                    sameTotalDegree = False
                    for n in nodes:
                        result = (totalDegreeSize[n] - minTot) / maxTot
                        if result < minNodeSize:
                            totalDegreeSize[n] = minNodeSize
                        else:
                            totalDegreeSize[n] = result
                if maxInw == minInw:
                    sameInWeight = True
                else:
                    sameInWeight = False
                    for n in nodes:
                        result = (inWeightSize[n] - minInw) / maxInw
                        if result < minNodeSize:
                            inWeightSize[n] = minNodeSize
                        else:
                            inWeightSize[n] = result
                if maxOutw == minOutw:
                    sameOutWeight = True
                else:
                    sameOutWeight = False
                    for n in nodes:
                        result = (outWeightSize[n] - minOutw) / maxOutw
                        if result < minNodeSize:
                            outWeightSize[n] = minNodeSize
                        else:
                            outWeightSize[n] = result
                if maxTotw == minTotw:
                    sameTotalWeight = True
                else:
                    sameTotalWeight = False
                    for n in nodes:
                        result = (totalWeightSize[n] - minTotw) / maxTotw
                        if result < minNodeSize:
                            totalWeightSize[n] = minNodeSize
                        else:
                            totalWeightSize[n] = result

                # Making a dictionary for all attributes, and ensuring none of the values go crazy.
                attributes = {}
                maxNodeSize = 30
                for n in nodes:
                    outd = outDegreeSize[n]
                    totd = totalDegreeSize[n]
                    inw = inWeightSize[n]
                    outw = outWeightSize[n]
                    totw = totalWeightSize[n]

                    if sameInDegree:
                        ind = 1
                    else:
                        ind = inDegreeSize[n]
                    if sameOutDegree:
                        outd = 1
                    else:
                        outd = outDegreeSize[n]
                    if sameTotalDegree:
                        totd = 1
                    else:
                        totd = totalDegreeSize[n]
                    if sameInWeight:
                        inw = 1
                    else:
                        inw = inWeightSize[n]
                    if sameOutWeight:
                        outw = 1
                    else:
                        outw = outWeightSize[n]
                    if sameTotalWeight:
                        totw = 1
                    else:
                        totw = totalWeightSize[n]

                    attributes[n] = {
                        'indegreesize': ind * maxNodeSize,
                        'outdegreesize': outd * maxNodeSize,
                        'totaldegreesize': totd * maxNodeSize,
                        'inweightsize': inw * maxNodeSize,
                        'outweightsize': outw * maxNodeSize,
                        'totalweightsize': totw * maxNodeSize,
                        'indegree': inDegree[n],
                        'outdegree': outDegree[n],
                        'totaldegree': totalDegree[n],
                        'inweight': inWeight[n],
                        'outweight': outWeight[n],
                        'totalweight': totalWeight[n],
                        'count': 0
                    }

                set_node_attributes(G, attributes)
                plot = HVGraph.from_networkx(G, layout).opts(
                    directed=get_directed(self.sid), arrowhead_length=0.01)

                # disabling displaying all node info on hovering over the node
                tooltips = [('Index', '@index'), ('In-Degree', '@indegree'),
                            ('Out-Degree', '@outdegree'),
                            ('Total Degree', '@totaldegree'),
                            ('In Edge Weight', '@inweight'),
                            ('Out Edge-Weight', '@outweight'),
                            ('Total Edge-Weight', '@totalweight')]
                hover = HoverTool(tooltips=tooltips)
            else:
                attributes = {}
                for n in nodes:
                    attributes[n] = {
                        'indegreesize': 1,
                        'outdegreesize': 1,
                        'totaldegreesize': 1,
                        'inweightsize': 1,
                        'outweightsize': 1,
                        'totalweightsize': 1,
                        'indegree': 0,
                        'outdegree': 0,
                        'totaldegree': 0,
                        'inweight': 0,
                        'outweight': 0,
                        'totalweight': 0,
                        'count': 0
                    }

                set_node_attributes(G, attributes)
                plot = HVGraph.from_networkx(G, layout).opts(
                    directed=get_directed(self.sid), arrowhead_length=0.01)
                tooltips = [('Index', '@index'), ('In-Degree', '@indegree'),
                            ('Out-Degree', '@outdegree'),
                            ('Total Degree', '@totaldegree'),
                            ('In Edge Weight', '@inweight'),
                            ('Out Edge-Weight', '@outweight'),
                            ('Total Edge-Weight', '@totalweight')]
                hover = HoverTool(tooltips=tooltips)

            # Make custom dictionary with color palettes
            for c in self.colorList:
                if c == 'cividis':
                    self.colorMap[c] = Cividis256
                elif c == 'viridis':
                    self.colorMap[c] = Viridis256
                elif c == 'inferno':
                    self.colorMap[c] = Inferno256
                else:
                    self.colorMap[c] = palette[c]

            if max(nodeCentralities) > 0:
                if datashaded and self.nodeCount > 1:
                    plot = bundle_graph(plot)
            points = plot.nodes
            points.opts(cmap=self.colorMap[self.color_palette],
                        color=self.node_color,
                        size=self.node_size,
                        tools=['box_select', 'lasso_select', 'tap', hover],
                        active_tools=['wheel_zoom'],
                        toolbar='above',
                        show_legend=False,
                        width=self.size,
                        height=self.size)

            plot.opts(node_size=0,
                      node_color=None,
                      node_line_width=0,
                      node_hover_fill_color='green')
            return plot, points
Пример #11
0
def generate3DDiagram(file, sid, df=False):
    if not df:
        df = decreaseDiagramSize(file)
    else:
        df = file

    names = df.columns.tolist()
    N = len(names)

    G = from_pandas_adjacency(df)
    G = convert_node_labels_to_integers(G)
    # 3d spring layout
    pos = spring_layout(G, dim=3)
    # numpy array of x,y,z positions in sorted node order
    layt = array([pos[v] for v in sorted(G)])
    # scalar colors
    scalars = array(list(G.nodes())) + 5
    # edges

    maximum = 0
    for (u, v, d) in G.edges(data=True):
        w = d['weight']
        if w > maximum:
            maximum = w

    Edges = array([(int(u), int(v), {
        'weight': d['weight'] / maximum
    }) for (u, v, d) in G.edges(data=True) if d['weight'] > 0])

    def make_edge(x, y, z, weight):
        return Scatter3d(
            x=x,
            y=y,
            z=z,
            # line=dict(color='rgb(' + str(int(100 + (weight ** 2 - 0.25) * 100)) + ',100,100)', width=(weight * 3) ** 2),
            line=dict(color='rgb(' + str(int(weight) * 180) + ', 0, 0)',
                      width=(weight * 3)**2),
            hoverinfo='none',
            mode='lines')

    Xn = [layt[k][0] for k in range(N)]  # x-coordinates of nodes
    Yn = [layt[k][1] for k in range(N)]  # y-coordinates
    Zn = [layt[k][2] for k in range(N)]  # z-coordinates
    edge_traces = []

    for e in Edges:
        x_edge_ends = [layt[e[0]][0], layt[e[1]][0],
                       None]  # x-coordinates of edge ends
        y_edge_ends = [layt[e[0]][1], layt[e[1]][1], None]
        z_edge_ends = [layt[e[0]][2], layt[e[1]][2], None]
        edge_traces.append(
            make_edge(x_edge_ends, y_edge_ends, z_edge_ends, e[2]['weight']))

    trace2 = Scatter3d(x=Xn,
                       y=Yn,
                       z=Zn,
                       mode='markers',
                       marker=dict(symbol='circle',
                                   size=6,
                                   color=scalars,
                                   colorscale='Viridis',
                                   line=dict(color='rgb(50,50,50)',
                                             width=0.5)),
                       text=names,
                       hoverinfo='text')

    axis = dict(showbackground=False,
                showline=False,
                zeroline=False,
                showgrid=False,
                showticklabels=False,
                title='')

    from graphion.session.handler import calculate_plot_size
    psize = calculate_plot_size(sid)

    layout = Layout(title="Force-directed layout",
                    width=psize,
                    height=psize,
                    showlegend=False,
                    scene=dict(
                        xaxis=dict(axis),
                        yaxis=dict(axis),
                        zaxis=dict(axis),
                    ),
                    margin=dict(t=100),
                    hovermode='closest',
                    paper_bgcolor='rgba(0,0,0,0)',
                    plot_bgcolor='rgba(0,0,0,0)')

    data = [trace2] + edge_traces
    fig = Figure(data=data, layout=layout)
    extension('plotly')
    painful = Plotly(fig)
    return painful
Пример #12
0
import pandas as pd
import networkx as nx
phases = {}
G = {}
for i in range(1,12): 
  var_name = "phase" + str(i)
  file_name = "https://raw.githubusercontent.com/ragini30/Networks-Homework/main/" + var_name + ".csv"

#   file_name = "../data/CAVIAR/" + var_name + ".csv"
  phases[i] = pd.read_csv(file_name, index_col = ["players"])
  phases[i].columns = "n" + phases[i].columns
  phases[i].index = phases[i].columns
  G[i] = nx.from_pandas_adjacency(phases[i]) nx.DiGraph()
  G[i].name = var_name


dc9=nx.degree_centrality(G[9]) 
skeys=['n1','n3','n12','n83'] 
[dc9[k] for k in skeys] 

bc3=nx.betweenness_centrality(G[3], normalized = True)
[bc3[k] for k in skeys] 
bc9=nx.betweenness_centrality(G[9], normalized = True)
[bc9[k] for k in skeys] 

ec3=nx.eigenvector_centrality(G[3])
[ec3[k] for k in skeys]   

ec9=nx.eigenvector_centrality(G[9])
[ec9[k] for k in skeys]  
Пример #13
0
def _adj_to_edge(df):
    """convert adjacency matrix to edge list
    """
    return nx.to_pandas_edgelist(nx.from_pandas_adjacency(df))
Пример #14
0
"""


import networkx as nx
#import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt


blocks = ['Salt Data//salt-block4.csv']

for x in blocks:
    print(x)
    block = pd.read_csv(x)
    block = block.drop(['Unnamed: 0'], axis =1)
    
    for i in block.columns:
        block.loc[block[i] < .3, i] = 0
        block.loc[block[i] >= .3, i] = 1
    
    print("done with for loop")
    block = block.reset_index(drop = True)
    block.columns = range(block.shape[1])
    G = nx.from_pandas_adjacency(block)
    print(nx.info(G))
    nx.write_gpickle(G, str(x)[:-4] + '.p') 


#G.remove_nodes_from(list(nx.isolates(G)))
#g = nx.draw(G, node_size=1, width = .01)
 
Пример #15
0
chunks = [distance[x:x+38] for x in range(0, len(distance), 38)]

#turn into a distance matrix
cities = []
for i in range(1, len(data)+1):
    cities.append('city ' + str(i))
distances = pd.DataFrame(chunks, columns=cities, index=cities)

#Question a

#Create X,Y coordinates of each node
xypos = {}
for i in range(len(data)):
    xypos['city ' + str(data['city'][i])] = ( data['longitude'][i] , data['latitude'][i])

G = nx.from_pandas_adjacency(distances)
nx.draw_networkx_nodes(G, pos = xypos)
nx.draw_networkx_labels(G, pos = xypos)


#Question c
#Definining several functions to use later in a while loop
def saveresult(draw=True):
    """
    To be used after optimizing a Gurobi model.
    Saves the result of a Gurobi Optimizer solution as a Graph, returning NetworkX Graph Object.
    Use draw=True argument to draw the graph.
    """

    vals = m.getAttr('x', vars)
    res=[]
Пример #16
0
def to_networkx_graph(data, create_using=None, multigraph_input=False):
    """Make a NetworkX graph from a known data structure.

    The preferred way to call this is automatically
    from the class constructor

    >>> d = {0: {1: {'weight':1}}} # dict-of-dicts single edge (0,1)
    >>> G = nx.Graph(d)

    instead of the equivalent

    >>> G = nx.from_dict_of_dicts(d)

    Parameters
    ----------
    data : object to be converted

        Current known types are:
         any NetworkX graph
         dict-of-dicts
         dict-of-lists
         list of edges
         Pandas DataFrame (row per edge)
         numpy matrix
         numpy ndarray
         scipy sparse matrix
         pygraphviz agraph

    create_using : NetworkX graph constructor, optional (default=nx.Graph)
        Graph type to create. If graph instance, then cleared before populated.

    multigraph_input : bool (default False)
        If True and  data is a dict_of_dicts,
        try to create a multigraph assuming dict_of_dict_of_lists.
        If data and create_using are both multigraphs then create
        a multigraph from a multigraph.

    """
    # NX graph
    if hasattr(data, "adj"):
        try:
            result = from_dict_of_dicts(data.adj,
                                        create_using=create_using,
                                        multigraph_input=data.is_multigraph())
            if hasattr(data, 'graph'):  # data.graph should be dict-like
                result.graph.update(data.graph)
            if hasattr(data, 'nodes'):  # data.nodes should be dict-like
                result._node.update((n, dd.copy()) for n, dd in data.nodes.items())
            return result
        except:
            raise nx.NetworkXError("Input is not a correct NetworkX graph.")

    # pygraphviz  agraph
    if hasattr(data, "is_strict"):
        try:
            return nx.nx_agraph.from_agraph(data, create_using=create_using)
        except:
            raise nx.NetworkXError("Input is not a correct pygraphviz graph.")

    # dict of dicts/lists
    if isinstance(data, dict):
        try:
            return from_dict_of_dicts(data, create_using=create_using,
                                      multigraph_input=multigraph_input)
        except:
            try:
                return from_dict_of_lists(data, create_using=create_using)
            except:
                raise TypeError("Input is not known type.")

    # list or generator of edges

    if (isinstance(data, (list, tuple)) or
            any(hasattr(data, attr) for attr in ['_adjdict', 'next', '__next__'])):
        try:
            return from_edgelist(data, create_using=create_using)
        except:
            raise nx.NetworkXError("Input is not a valid edge list")

    # Pandas DataFrame
    try:
        import pandas as pd
        if isinstance(data, pd.DataFrame):
            if data.shape[0] == data.shape[1]:
                try:
                    return nx.from_pandas_adjacency(data, create_using=create_using)
                except:
                    msg = "Input is not a correct Pandas DataFrame adjacency matrix."
                    raise nx.NetworkXError(msg)
            else:
                try:
                    return nx.from_pandas_edgelist(data, edge_attr=True, create_using=create_using)
                except:
                    msg = "Input is not a correct Pandas DataFrame edge-list."
                    raise nx.NetworkXError(msg)
    except ImportError:
        msg = 'pandas not found, skipping conversion test.'
        warnings.warn(msg, ImportWarning)

    # numpy matrix or ndarray
    try:
        import numpy
        if isinstance(data, (numpy.matrix, numpy.ndarray)):
            try:
                return nx.from_numpy_matrix(data, create_using=create_using)
            except:
                raise nx.NetworkXError(
                    "Input is not a correct numpy matrix or array.")
    except ImportError:
        warnings.warn('numpy not found, skipping conversion test.',
                      ImportWarning)

    # scipy sparse matrix - any format
    try:
        import scipy
        if hasattr(data, "format"):
            try:
                return nx.from_scipy_sparse_matrix(data, create_using=create_using)
            except:
                raise nx.NetworkXError(
                    "Input is not a correct scipy sparse matrix type.")
    except ImportError:
        warnings.warn('scipy not found, skipping conversion test.',
                      ImportWarning)

    raise nx.NetworkXError(
        "Input is not a known data type for conversion.")
def initialize_with_mobility(filename):
    logger.info(f"Reading in mobility from {filename}")
    mobility = pd.read_csv(filename)
    mobility.set_index("ADM", inplace=True)
    G = nx.from_pandas_adjacency(mobility, nx.DiGraph)
    return G
Пример #18
0
def to_networkx_graph(data, create_using=None, multigraph_input=False):
    """Make a NetworkX graph from a known data structure.

    The preferred way to call this is automatically
    from the class constructor

    >>> d = {0: {1: {'weight':1}}} # dict-of-dicts single edge (0,1)
    >>> G = nx.Graph(d)

    instead of the equivalent

    >>> G = nx.from_dict_of_dicts(d)

    Parameters
    ----------
    data : object to be converted

       Current known types are:
         any NetworkX graph
         dict-of-dicts
         dict-of-lists
         list of edges
         Pandas DataFrame (row per edge)
         numpy matrix
         numpy ndarray
         scipy sparse matrix
         pygraphviz agraph

    create_using : NetworkX graph
       Use specified graph for result.  Otherwise a new graph is created.

    multigraph_input : bool (default False)
      If True and  data is a dict_of_dicts,
      try to create a multigraph assuming dict_of_dict_of_lists.
      If data and create_using are both multigraphs then create
      a multigraph from a multigraph.

    """
    # NX graph
    if hasattr(data, "adj"):
        try:
            result = from_dict_of_dicts(data.adj,
                                        create_using=create_using,
                                        multigraph_input=data.is_multigraph())
            if hasattr(data, 'graph'):  # data.graph should be dict-like
                result.graph.update(data.graph)
            if hasattr(data, 'nodes'):  # data.nodes should be dict-like
                result._node.update((n, dd.copy()) for n, dd in data.nodes.items())
            return result
        except:
            raise nx.NetworkXError("Input is not a correct NetworkX graph.")

    # pygraphviz  agraph
    if hasattr(data, "is_strict"):
        try:
            return nx.nx_agraph.from_agraph(data, create_using=create_using)
        except:
            raise nx.NetworkXError("Input is not a correct pygraphviz graph.")

    # dict of dicts/lists
    if isinstance(data, dict):
        try:
            return from_dict_of_dicts(data, create_using=create_using,
                                      multigraph_input=multigraph_input)
        except:
            try:
                return from_dict_of_lists(data, create_using=create_using)
            except:
                raise TypeError("Input is not known type.")

    # list or generator of edges

    if (isinstance(data, (list, tuple)) or
            any(hasattr(data, attr) for attr in ['_adjdict', 'next', '__next__'])):
        try:
            return from_edgelist(data, create_using=create_using)
        except:
            raise nx.NetworkXError("Input is not a valid edge list")

    # Pandas DataFrame
    try:
        import pandas as pd
        if isinstance(data, pd.DataFrame):
            if data.shape[0] == data.shape[1]:
                try:
                    return nx.from_pandas_adjacency(data, create_using=create_using)
                except:
                    msg = "Input is not a correct Pandas DataFrame adjacency matrix."
                    raise nx.NetworkXError(msg)
            else:
                try:
                    return nx.from_pandas_edgelist(data, edge_attr=True, create_using=create_using)
                except:
                    msg = "Input is not a correct Pandas DataFrame edge-list."
                    raise nx.NetworkXError(msg)
    except ImportError:
        msg = 'pandas not found, skipping conversion test.'
        warnings.warn(msg, ImportWarning)

    # numpy matrix or ndarray
    try:
        import numpy
        if isinstance(data, (numpy.matrix, numpy.ndarray)):
            try:
                return nx.from_numpy_matrix(data, create_using=create_using)
            except:
                raise nx.NetworkXError(
                    "Input is not a correct numpy matrix or array.")
    except ImportError:
        warnings.warn('numpy not found, skipping conversion test.',
                      ImportWarning)

    # scipy sparse matrix - any format
    try:
        import scipy
        if hasattr(data, "format"):
            try:
                return nx.from_scipy_sparse_matrix(data, create_using=create_using)
            except:
                raise nx.NetworkXError(
                    "Input is not a correct scipy sparse matrix type.")
    except ImportError:
        warnings.warn('scipy not found, skipping conversion test.',
                      ImportWarning)

    raise nx.NetworkXError(
        "Input is not a known data type for conversion.")
Пример #19
0
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt


pairs1 = pd.read_csv('city_pairs.csv', names=['city1', 'city2'])
pairs2 = pd.read_csv('city_pairs.csv', names=['city2', 'city1'])
pairs = pd.concat([pairs1, pairs2])

# create adjacency matrix
adjacency = pd.crosstab(pairs['city1'], pairs['city2'])

# create graph object with NetworkX library
g = nx.from_pandas_adjacency(adjacency)

# run PageRank algorithm and add result to matrix
rank = nx.pagerank(g, alpha=0.85)
adjacency['rank'] = pd.Series(rank)

adjacency = adjacency.sort_values(by='rank', ascending=False)

# relabel columns
short = [x.replace(' ', '')[:3] for x in adjacency.columns]
adjacency.set_axis(1, short)

print(adjacency)

# color by city state
cities = pd.read_csv('cities.csv', names=['state'], index_col=0)
Пример #20
0
def to_networkx_graph(data, create_using=None, multigraph_input=False):
    """Make a NetworkX graph from a known data structure.

    The preferred way to call this is automatically
    from the class constructor

    >>> d = {0: {1: {"weight": 1}}}  # dict-of-dicts single edge (0,1)
    >>> G = nx.Graph(d)

    instead of the equivalent

    >>> G = nx.from_dict_of_dicts(d)

    Parameters
    ----------
    data : object to be converted

        Current known types are:
         any NetworkX graph
         dict-of-dicts
         dict-of-lists
         container (e.g. set, list, tuple) of edges
         iterator (e.g. itertools.chain) that produces edges
         generator of edges
         Pandas DataFrame (row per edge)
         numpy matrix
         numpy ndarray
         scipy sparse matrix
         pygraphviz agraph

    create_using : NetworkX graph constructor, optional (default=nx.Graph)
        Graph type to create. If graph instance, then cleared before populated.

    multigraph_input : bool (default False)
        If True and  data is a dict_of_dicts,
        try to create a multigraph assuming dict_of_dict_of_lists.
        If data and create_using are both multigraphs then create
        a multigraph from a multigraph.

    """
    # NX graph
    if hasattr(data, "adj"):
        try:
            result = from_dict_of_dicts(
                data.adj,
                create_using=create_using,
                multigraph_input=data.is_multigraph(),
            )
            if hasattr(data, "graph"):  # data.graph should be dict-like
                result.graph.update(data.graph)
            if hasattr(data, "nodes"):  # data.nodes should be dict-like
                # result.add_node_from(data.nodes.items()) possible but
                # for custom node_attr_dict_factory which may be hashable
                # will be unexpected behavior
                for n, dd in data.nodes.items():
                    result._node[n].update(dd)
            return result
        except Exception as e:
            raise nx.NetworkXError(
                "Input is not a correct NetworkX graph.") from e

    # pygraphviz  agraph
    if hasattr(data, "is_strict"):
        try:
            return nx.nx_agraph.from_agraph(data, create_using=create_using)
        except Exception as e:
            raise nx.NetworkXError(
                "Input is not a correct pygraphviz graph.") from e

    # dict of dicts/lists
    if isinstance(data, dict):
        try:
            return from_dict_of_dicts(data,
                                      create_using=create_using,
                                      multigraph_input=multigraph_input)
        except:
            try:
                return from_dict_of_lists(data, create_using=create_using)
            except Exception as e:
                raise TypeError("Input is not known type.") from e

    # Pandas DataFrame
    try:
        import pandas as pd

        if isinstance(data, pd.DataFrame):
            if data.shape[0] == data.shape[1]:
                try:
                    return nx.from_pandas_adjacency(data,
                                                    create_using=create_using)
                except Exception as e:
                    msg = "Input is not a correct Pandas DataFrame adjacency matrix."
                    raise nx.NetworkXError(msg) from e
            else:
                try:
                    return nx.from_pandas_edgelist(data,
                                                   edge_attr=True,
                                                   create_using=create_using)
                except Exception as e:
                    msg = "Input is not a correct Pandas DataFrame edge-list."
                    raise nx.NetworkXError(msg) from e
    except ImportError:
        msg = "pandas not found, skipping conversion test."
        warnings.warn(msg, ImportWarning)

    # numpy matrix or ndarray
    try:
        import numpy

        if isinstance(data, (numpy.matrix, numpy.ndarray)):
            try:
                return nx.from_numpy_matrix(data, create_using=create_using)
            except Exception as e:
                raise nx.NetworkXError(
                    "Input is not a correct numpy matrix or array.") from e
    except ImportError:
        warnings.warn("numpy not found, skipping conversion test.",
                      ImportWarning)

    # scipy sparse matrix - any format
    try:
        import scipy

        if hasattr(data, "format"):
            try:
                return nx.from_scipy_sparse_matrix(data,
                                                   create_using=create_using)
            except Exception as e:
                raise nx.NetworkXError(
                    "Input is not a correct scipy sparse matrix type.") from e
    except ImportError:
        warnings.warn("scipy not found, skipping conversion test.",
                      ImportWarning)

    # Note: most general check - should remain last in order of execution
    # Includes containers (e.g. list, set, dict, etc.), generators, and
    # iterators (e.g. itertools.chain) of edges

    if isinstance(data, (Collection, Generator, Iterator)):
        try:
            return from_edgelist(data, create_using=create_using)
        except Exception as e:
            raise nx.NetworkXError("Input is not a valid edge list") from e

    raise nx.NetworkXError("Input is not a known data type for conversion.")
Пример #21
0
def test_clf(seed_labels,
             y_true,
             A,
             true_prob=None,
             avg='micro',
             clf=local_and_global_consistency,
             kl_opt=False):
    """Test a semi-supervised node classification against some known
    node -> label mapping

    Classification metrics returned are micro-averaged over cateories.

    Parameters
    ----------
    seed_labels: dict
        contains initial "seed" nodes and their seed_labels
    y_true: pandas.Series
        must have a defined (ordered) pandas.CategoricalDtype
    A: array
        pandas matrix to build/classify graph (rows/columns are node names).
    clf: function
        one of the networkx.node_classification algorithms. Defaults to
        `local_and_global_consistency`.
    """
    t = get_threslist(A)
    cat = pd.CategoricalDtype(y_true.cat.categories, ordered=True)

    p, r, f, kl_vals, probs = [], [], [], [], []

    for ti in t:
        A_i = A.where(A > ti, other=0)
        G = nx.from_pandas_adjacency(A_i, create_using=nx.Graph)

        for k, v in seed_labels.items():
            if k in G.node.keys():
                G.node[k]['label'] = v

        pred_labels, pred = clf(G)

        if true_prob is not None:

            def kl_div(λ):
                prob = softmax(λ * pred[true_prob.columns])
                kl = entropy(true_prob.T, prob.T)
                return np.nansum(kl)

            opt_kl_temp = minimize_scalar(kl_div)

            K = opt_kl_temp['x']
            # K = 500.
            pred_prob = softmax(K * pred[true_prob.columns])

            kl_vals.append(entropy(true_prob.T, pred_prob.T))
            probs.append(pred_prob)

        y_pred = pd.Series(pred_labels).astype(cat)
        pi, ri, fi, _ = precision_recall_fscore_support(y_true.cat.codes,
                                                        y_pred.cat.codes,
                                                        average=avg,
                                                        labels=[0, 1, 2])
        p.append(pi)
        r.append(ri)
        f.append(fi)

    if kl_opt:
        opt_pos = np.argmin([np.sum(i) for i in kl_vals])
    else:
        opt_pos = np.argmax(f)

    d = dict(
        x=A,
        p=np.array(p),
        r=np.array(r),
        t=t,
        f=np.array(f),
        opt_pos=opt_pos,
        thres=A.where(A > t[opt_pos], other=0),
        kl_vals=np.array(kl_vals),
    )

    d['aps'] = (np.diff(d['r']) * d['p'][1:]).sum()

    if true_prob is not None:
        d['probs'] = probs[opt_pos]

    return d
    https://colab.research.google.com/drive/1KERj8-aGjiBXC4fW_8dfG-F9ld_CAcBp
"""

import pandas as pd
import networkx as nx
phases = {}
G = {}
for i in range(1,12): 
  var_name = "phase" + str(i)
  file_name = "https://raw.githubusercontent.com/ragini30/Networks-Homework/main/" + var_name + ".csv"

#   file_name = "../data/CAVIAR/" + var_name + ".csv"
  phases[i] = pd.read_csv(file_name, index_col = ["players"])
  phases[i].columns = "n" + phases[i].columns
  phases[i].index = phases[i].columns
  G[i] = nx.from_pandas_adjacency(phases[i],create_using=nx.DiGraph())
  G[i].name = var_name

for i in range(1,12): 
  h,a=nx.algorithms.link_analysis.hits(G[i],max_iter=1000000)
  print("phase {0} n1 hubs {1} auth {2}.".format(i,h['n1'],a['n1']))
  print("n3 hubs {1} auth {2}.".format(i,h['n3'],a['n3']))

!apt-get install graphviz graphviz-dev
!pip install pygraphviz

import matplotlib.pyplot as plt
for i in range(1,12):
  fig=plt.figure(i)
  fig.suptitle(i)
  nx.draw(G[i], pos=nx.drawing.nx_agraph.graphviz_layout(G[i]), with_labels=True) 
Пример #23
0
labels = np.array(itemgetter(*labels)(name_map))
# %% [markdown]
# #

blockmodel_df = get_blockmodel_df(A,
                                  labels,
                                  use_weights=True,
                                  return_counts=False)
sns.heatmap(blockmodel_df, annot=True, cmap="Reds")

# %% [markdown]
# # make the networkx graph

from graspy.embed import AdjacencySpectralEmbed

g = nx.from_pandas_adjacency(blockmodel_df, create_using=nx.DiGraph())
uni_labels, counts = np.unique(labels, return_counts=True)
size_scaler = 5
size_map = dict(zip(uni_labels, size_scaler * counts))
nx.set_node_attributes(g, size_map, name="Size")
adj = nx.to_numpy_array(g, nodelist=uni_labels)
node_signal_flow = signal_flow(adj)
sf_map = dict(zip(uni_labels, node_signal_flow))
nx.set_node_attributes(g, sf_map, name="Signal Flow")
sym_adj = symmetrize(adj)
node_lap = AdjacencySpectralEmbed(n_components=1).fit_transform(sym_adj)
node_lap = np.squeeze(node_lap)
lap_map = dict(zip(uni_labels, node_lap))
nx.set_node_attributes(g, lap_map, name="Laplacian-2")
color_map = dict(zip(uni_labels, cc.glasbey_light))
nx.set_node_attributes(g, color_map, name="Color")
Пример #24
0
def to_minigraph(
    adj,
    labels,
    drop_neg=True,
    remove_diag=True,
    size_scaler=1,
    use_counts=False,
    use_weights=True,
    color_map=None,
):
    # convert the adjacency and a partition to a minigraph based on SBM probs
    prob_df = get_blockmodel_df(adj,
                                labels,
                                return_counts=use_counts,
                                use_weights=use_weights)
    if drop_neg and ("-1" in prob_df.index):
        prob_df.drop("-1", axis=0, inplace=True)
        prob_df.drop("-1", axis=1, inplace=True)

    if remove_diag:
        adj = prob_df.values
        adj -= np.diag(np.diag(adj))
        prob_df.data = prob_df

    g = nx.from_pandas_adjacency(prob_df, create_using=nx.DiGraph())
    uni_labels, counts = np.unique(labels, return_counts=True)

    # add size attribute base on number of vertices
    size_map = dict(zip(uni_labels, size_scaler * counts))
    nx.set_node_attributes(g, size_map, name="Size")

    # add signal flow attribute (for the minigraph itself)
    mini_adj = nx.to_numpy_array(g, nodelist=uni_labels)
    node_signal_flow = signal_flow(mini_adj)
    sf_map = dict(zip(uni_labels, node_signal_flow))
    nx.set_node_attributes(g, sf_map, name="Signal Flow")

    # add spectral properties
    sym_adj = symmetrize(mini_adj)
    n_components = 10
    latent = AdjacencySpectralEmbed(
        n_components=n_components).fit_transform(sym_adj)
    for i in range(n_components):
        latent_dim = latent[:, i]
        lap_map = dict(zip(uni_labels, latent_dim))
        nx.set_node_attributes(g, lap_map, name=f"AdjEvec-{i}")

    # add spring layout properties
    pos = nx.spring_layout(g)
    spring_x = {}
    spring_y = {}
    for key, val in pos.items():
        spring_x[key] = val[0]
        spring_y[key] = val[1]
    nx.set_node_attributes(g, spring_x, name="Spring-x")
    nx.set_node_attributes(g, spring_y, name="Spring-y")

    # add colors
    if color_map is None:
        color_map = dict(zip(uni_labels, cc.glasbey_light))
    nx.set_node_attributes(g, color_map, name="Color")
    return g
Пример #25
0
    # df = df.iloc[:5, :5].copy()
    n_nodes = df.shape[0]

    # Rename Nodes (node names must start from 0)
    index_bkp = df.index
    columns_bkp = df.columns
    df.index = range(0, n_nodes)
    df.columns = range(0, n_nodes)

    df.columns = df.index
    nodelist = df.index.values

    # Build Networkx object
    print('--- Building Network ---')
    # C for Correlation network
    G = nx.from_pandas_adjacency(df, create_using=nx.Graph)

    # P for Proximity network (which in this case is a Correlation)
    P = [w for i, j, w in G.edges.data('weight')]

    # Converts (P)roximity to (D)istance using a map.
    D_dict = dict(zip(G.edges(), map(prox2dist, P)))
    # Set the distance value for each edge
    nx.set_edge_attributes(G, name='distance', values=D_dict)
    # Compute closure (Using the Dijkstra Class directly)
    print('--- Computing Dijkstra APSP ---')
    dij = Dijkstra.from_edgelist(D_dict, directed=False, verbose=10)
    # Serial Computation
    poolresults = list(range(len(dij.N)))
    for node in dij.N:
        print('> Dijkstra node %s of %s' % (node + 1, len(dij.N)))
Пример #26
0
(Projection from a bipartite network.)
'''

f = open("PREVERE1.csv")
ncols = len(f.readline().split(','))
df = pd.read_csv(
    "PREVERE1.csv", skiprows=1, usecols=range(1, ncols), header=None
)  # skips the top row in the file and only uses columns 1 though ncols (using 0-indicing). Tells pandas not to expect a header
print(df)  # notice the column names are 1, ...., ncols-1
col_names = {
    i: i - 1
    for i in range(1, ncols)
}  # Column names must match column indices (It's what networkx wants, I don't know why.)
df = df.rename(columns=col_names
               )  # Renaming columns so that column names match column indices.
G = nx.from_pandas_adjacency(
    df)  # Makes a network from the pandas adjacency matrix

# Visualize the network
pos = nx.spring_layout(G)
plt.figure(
    figsize=(50, 50)
)  # Make figure bigger so we can actually see all the edges! (See what happens otherwise by calling nx.draw(G) before this line.)
nx.draw(G, pos, node_size=25)
plt.show()

# How should we visualize the edge weights?
# Access edge data and assign colors according to edge weight. (This is just one option for visualizing edge weights.)
color = ['k', 'y', 'r']  # k means black, y means yellow, r means red.
for u, v in G.edges():  # Iterate through all edges (u, v).
    G[u][v][
        'weight']  # weight of edge (u,v). This edge attribute is already stored in G, because it was in the df adjacency matrix that we created the network from.
Пример #27
0
    def sparce_invcov(self,
                      df,
                      cols=None,
                      style="GraphLassoCV",
                      param=0.2,
                      layout="circular",
                      center=None,
                      figsize=(7, 7)):
        """
        cols: columns to calculate. If None, takes all numerical columns
        style: GraphLassoCV or LedoitWolf
        param: Parameter to pass to fitting algorithm. If GraphLasso, =alpha; if LedoitWolf, =threshold
        layout: choose between "circular", "spring", "shell"
        center: Put a certain colname in the center of the graph
            
        Sparse covariance matrix estimation
        Plot the sparce precision matrix
        """
        new_df = Utility().normalize(df).dropna()  # Remove NA, normalize
        if cols == None:
            cols = df._get_numeric_data().columns
        data = new_df[cols]
        if style == "GraphLassoCV":
            model = GraphicalLassoCV(alphas=[param, param],
                                     cv=10,
                                     max_iter=5000)
            model.fit(data)
            sparce_mat = np.zeros(np.shape(model.precision_))
            sparce_mat[model.precision_ != 0] = -1
            np.fill_diagonal(sparce_mat, 1)
        else:  # Style == LedoitWolf
            model = LedoitWolf()
            model.fit(data)
            sparce_mat = np.zeros(np.shape(model.get_precision()))
            sparce_mat[np.abs(model.get_precision()) > param] = -1
        np.fill_diagonal(sparce_mat, 1)
        sparce_mat = pd.DataFrame(sparce_mat,
                                  index=data.columns,
                                  columns=data.columns)

        # NetworkX Graph
        fig, ax = plt.subplots(figsize=figsize)
        G = nx.from_pandas_adjacency(sparce_mat)

        pos = {
            "circular": nx.drawing.circular_layout,
            "shell": nx.drawing.shell_layout,
            "spring": nx.drawing.spring_layout,
        }[layout](G, scale=2)
        pos[center] = np.array([0, 0])
        node_color = [
            'mintcream' if node == center else 'mintcream' for node in G.nodes
        ]
        node_size = [
            len(node) * 1500 if node == center else len(node) * 500
            for node in G.nodes()
        ]
        nodes = nx.draw_networkx_nodes(G,
                                       pos,
                                       node_shape='o',
                                       node_color=node_color,
                                       node_size=node_size)
        nodes.set_edgecolor('k')
        nx.draw_networkx_edges(G, pos, edge_color='r', width=2.0, alpha=0.8)
        nx.draw_networkx_labels(G, pos, font_weight='bold', font_size=10)
        plt.axis('off')
        plt.tight_layout()

        # Display precision matrix as heatmap
        fig, ax = plt.subplots(figsize=(5, 5))
        sns.heatmap(sparce_mat,
                    vmax=1,
                    vmin=-1,
                    linewidth=0.1,
                    cmap=plt.cm.RdBu_r,
                    cbar=False)
        ax.set_ylim(sparce_mat.T.shape[0] - 1e-9, -1e-9)
        plt.title('Sparse Inverse Covariance')
        plt.show()

        return sparce_mat
Пример #28
0
import pandas as pd
import networkx as nx

# adjacency matrix from BibExcel
df = pd.read_csv('filename.csv', sep=';', index_col=0)
G = nx.from_pandas_adjacency(df)
G.name = 'Name'
print(nx.info(G))
Пример #29
0
import pandas as pd

import networkx as nx

df = pd.DataFrame(
    {
        'a': [0.1, 0.2, 0.8],
        'b': [0.4, 0.3, 0.9],
        'c': [0.6, 0.7, 0.5]
    },
    index=['a', 'b', 'c'],
    columns=['a', 'b', 'c'])
print(df)

g = nx.from_pandas_adjacency(df, nx.DiGraph)
sdf = nx.to_pandas_adjacency(nx.stochastic_graph(g))
print(sdf)
pr = nx.pagerank_numpy(g, alpha=1)
print(f'\na:{pr["a"]:.3f}\nb:{pr["b"]:.3f}\nc:{pr["c"]:.3f}')
Пример #30
0
def create_graph_from_invcov(invcov,
                             p,
                             q,
                             title="",
                             subtitle="",
                             size=500,
                             remove_nodes=True,
                             labels=None,
                             show_weights=True):
    """Creates networkx graph from inverse covariance matrix and returns a bokeh plot of it.

    Arguments:
        invcov {np.ndarray} -- Inverse Covariance matrix of a grpah

    Keyword Arguments:
        title {str} -- Optional Title of the plot (default: {""})
    """
    assert (invcov.shape[0] == invcov.shape[1])
    invcov = pd.DataFrame(invcov)

    G = nx.from_pandas_adjacency(invcov)

    if (remove_nodes):
        G.remove_nodes_from(list(nx.isolates(G)))

    graph_renderer = from_networkx(
        G, nx.shell_layout, nlist=[list(range(p)),
                                   list(range(p, p + q))])

    graph_renderer.node_renderer.data_source.add([(k < p) * max(k, 1)
                                                  for k, v in G.degree()],
                                                 'is_p')
    graph_renderer.node_renderer.data_source.add(
        [min(10 + v, 20) for k, v in G.degree()], 'degree')
    # mapper = LinearColorMapper(palette=linear_palette(Spectral, p+1), low=0, high=p)
    mapper = LinearColorMapper(palette=linear_palette(Spectral4, 2),
                               low=0,
                               high=1)

    graph_renderer.node_renderer.glyph = Circle(size='degree',
                                                fill_color={
                                                    'field': 'is_p',
                                                    'transform': mapper
                                                })
    graph_renderer.node_renderer.selection_glyph = Circle(
        size='degree', fill_color=Spectral4[3])
    graph_renderer.node_renderer.hover_glyph = Circle(size='degree',
                                                      fill_color=Spectral4[3])

    edge_mapper = LinearColorMapper(palette=['pink', 'palegreen'],
                                    low=-1,
                                    high=1)

    edge_vals = [G.get_edge_data(u, v)["weight"] for u, v in G.edges]
    edge_weights = [
        min(max(abs(G.get_edge_data(u, v)["weight"]), 0.5) * 4, 6)
        for u, v in G.edges
    ]
    graph_renderer.edge_renderer.data_source.add(edge_weights, 'weights')
    graph_renderer.edge_renderer.data_source.add(np.sign(edge_vals), 'signs')

    if (show_weights):
        graph_renderer.edge_renderer.glyph = MultiLine(line_color={
            'field': 'signs',
            'transform': edge_mapper
        },
                                                       line_alpha=1.0,
                                                       line_width='weights')
    else:
        graph_renderer.edge_renderer.glyph = MultiLine(line_color="#CCCCCC",
                                                       line_alpha=1.0,
                                                       line_width=1)

    graph_renderer.edge_renderer.selection_glyph = MultiLine(
        line_color='black', line_width='weights')
    graph_renderer.edge_renderer.hover_glyph = MultiLine(line_color="#CCCCCC",
                                                         line_width='weights')

    graph_renderer.selection_policy = NodesAndLinkedEdges()
    graph_renderer.inspection_policy = NodesAndLinkedEdges()

    if (labels is not None):
        tmp = [labels[i] for i in G.nodes]
        graph_renderer.node_renderer.data_source.data['name'] = tmp
        neighbours = []
        adj = np.array(np.abs(invcov) > 0)
        neighbours = [
            labels[np.where(np.logical_or(adj[i, :], adj[:, i]))]
            for i in G.nodes
        ]

        graph_renderer.node_renderer.data_source.data[
            'neighbours'] = neighbours

        tooltips = [("idx:", "@index"), ("Name:", "@name"),
                    ("Neighbours:", "@neighbours{safe}")]
    else:
        neighbours = []
        adj = np.array(np.abs(invcov) > 0)
        for i in list(G.nodes):
            neighbours.append(
                list(map(str,
                         np.where(np.logical_or(adj[i, :], adj[:, i]))[0])))
        graph_renderer.node_renderer.data_source.data[
            'neighbours'] = neighbours
        tooltips = [("idx:", "@index"), ("Neighbours:", "@neighbours{safe}")]

    # add line breaks
    for n in neighbours:
        if len(n) > 3:
            for i in range(len(n)):
                if (i % 3 == 0):
                    n[i] = "<br/>" + n[i]

    bokeh_pl = Plot(plot_width=size,
                    plot_height=size,
                    x_range=Range1d(-1.1, 1.1),
                    y_range=Range1d(-1.1, 1.1))
    # bokeh_pl.title.text = title
    bokeh_pl.add_layout(Title(text=subtitle, text_font_style="italic"),
                        'above')
    bokeh_pl.add_layout(Title(text=title, text_font_size="16pt"), 'above')

    # bokeh_pl.add_tools(HoverTool(tooltips=None), TapTool(), BoxSelectTool())
    hover = HoverTool(tooltips=tooltips)
    bokeh_pl.add_tools(hover, TapTool(), BoxSelectTool())

    bokeh_pl.renderers.append(graph_renderer)

    return (bokeh_pl)
Пример #31
0
                val = val - hourRem + 1
            except:
                val = 0
            dfs[myTime].iloc[loc_df1, loc_df2] = val


def most_central_edge(G):
    centrality = betweenness(G, weight='weight')
    return max(centrality, key=centrality.get)


for value in range(4):
    value = value
    print(value)

    G = nx.from_pandas_adjacency(dfs[value])

    #PRINT DEGREE DISTRIBUTION============================================

    if (value != -1):
        degree_sequence = sorted([d for n, d in G.degree()],
                                 reverse=True)  # degree sequence
        degreeCount = collections.Counter(degree_sequence)
        deg, cnt = zip(*degreeCount.items())

        fig, ax = plt.subplots(figsize=(15, 10))
        plt.bar(deg, cnt, width=0.80, color='b')

        plt.title("Degree Histogram")
        plt.ylabel("Count")
        plt.xlabel("Degree")
# In[ ]:


# This needs more work. The Adjacency Matrix or CrossTab Matrix does not conform to the n x m shape or even logically !
#print dfx_ct1.shape  # n samples with m dimensions
#plt.scatter(dfx_ct1.loc['165.130.144.83'],dfx_ct1.loc['10.66.34.33'])
#plt.show()


# In[12]:


import networkx as nx
G = nx.Graph()
G = nx.from_pandas_adjacency(dfx_ct1)
G.name = 'Graph from dfx_ct1 adjacency matrix dataframe'


# In[13]:


print(nx.info(G))


# In[15]:


H = nx.DiGraph(G)

Пример #33
0
import plotly.figure_factory as ff
import plotly.io as pio

df_adjacency_matrix=pd.read_csv(r".\3.Word Matrices\WordsAdjacencyMatrix.csv",index_col=0)
df_words=pd.read_csv(r".\3.Word Matrices\WordsFrequencyShortened.csv",index_col=0)

# df_adjacency_matrix= df_adjacency_matrix.iloc[200:205,200:205]#Todo: To remove at the end
# df_adjacency_matrix= df_adjacency_matrix.iloc[200:250,200:250]#Todo: To remove at the end

# nodes_list=df_words.index.tolist()[200:205]
# nodes_list=df_words.index.tolist()[200:250]
nodes_list=df_words.index.tolist()
nodes_labels=dict(zip(nodes_list,nodes_list))


G = nx.from_pandas_adjacency(df_adjacency_matrix)
G.name = 'Graph from pandas adjacency matrix'
print(nx.info(G))
nx.draw(G, node_size=20, with_labels=True)
plt.show()

# compute the best partition
partition = community_louvain.best_partition(G,resolution=0.2)#Play with resolution (max=1.0) in order to get less (higher number) or more communities (clusters)


# draw the graph
plt.axis('off')

pos = nx.spring_layout(G) #Initial
# pos = nx.circular_layout(G)
Пример #34
0
def gen_gexf_and_pass_partition_data(
        data,
        stopwords=None,
        word_len_min=2,
        node_num=30,
        edge_remove_threshold=0,
        remove_isolated_node=True,
        layout='fr',
        iterations=50,
        fr_k=None,
        fa2_square=2,
        fa2_log_base=100,
):
    """Generate gexf file for SNA Interactive and Pass partition data

    :param str,list data: String Data (One post per line) | List Data (One post per element)
    :param str stopwords: Stopwords separated ','
    :param int word_len_min:
    :param int node_num: Number of nodes
    :param int edge_remove_threshold:
    :param bool remove_isolated_node:
    :param str layout:
    :param int iterations:
    :param int fr_k:
    :param int fa2_square:
    :param int fa2_log_base:
    :return: none

    """

    corpus = get_corpus(data=data)
    matrix = get_matrix(corpus=corpus, stopwords=stopwords, word_len_min=word_len_min)
    cooccur_matrix = matrix.get('cooccur_matrix')

    # Get Graph
    graph = nx.from_pandas_adjacency(cooccur_matrix)

    # Get Sub Data
    sub_data = get_sub_data(
        graph=graph,
        node_num=node_num,
        edge_remove_threshold=edge_remove_threshold,
        remove_isolated_node=remove_isolated_node,
        matrix=matrix,
    )
    sub_graph = sub_data.get('sub_graph')
    tf_sum_dict_sorted = sub_data.get('tf_sum_dict_sorted')
    isolated_nodes = sub_data.get('isolated_nodes')

    # ------ Set Attributes for gexf file ------ #
    # Add Node Weight
    scaled_weight_list = []
    for node, freq in tf_sum_dict_sorted[:node_num]:
        if node in isolated_nodes:
            continue
        else:
            scaled_weight = (freq * (70 ** 2) / tf_sum_dict_sorted[0][1])**(1/2)
            scaled_weight_list.append((node, scaled_weight))

    scaled_weight_dict = dict(scaled_weight_list)

    for node in scaled_weight_dict:
        sub_graph.nodes[node]['viz'] = {'size': scaled_weight_dict[node]}

    # Add edge weight
    edge_weight_max = max([sub_graph[u][v]['weight'] for u, v in sub_graph.edges])
    for u, v in sub_graph.edges:
        sub_graph[u][v]['viz'] = {'thickness': sub_graph[u][v]['weight'] * 35 / edge_weight_max}

    # ------ Set Layout ------ #
    # Fruchterman Reingold
    if layout == "fr":
        pos = nx.spring_layout(sub_graph, k=fr_k, iterations=iterations)
        for node in pos:
            sub_graph.nodes[node]['viz']['position'] = {'x': pos[node][0], 'y': pos[node][1]}
        
    # ForceAtlas2
    elif layout == "fa2":
        forceatlas2 = ForceAtlas2()
        pos = forceatlas2.forceatlas2_networkx_layout(sub_graph, iterations=iterations)

        for node in pos:
            raw_x, raw_y = pos[node]
            # -- Scaling Pos -- #
            adj_x, adj_y = [math.log(abs(coord) ** fa2_square, fa2_log_base) for coord in pos[node]]
            if raw_x < 0: adj_x *= -1
            if raw_y < 0: adj_y *= -1
            # ----------------- #
            sub_graph.nodes[node]['viz']['position'] = {'x': adj_x, 'y': adj_y}
    # -------------------------- #

    # Generate gexf file
    write_gexf(graph=sub_graph)

    # ------ Pass partition data to template ------ #
    partition = community.best_partition(sub_graph)
    partition_len = max(partition.values()) + 1
    node_freq_per_klass = {n: list() for n in range(partition_len)}
    for node, klass in partition.items():
        node_freq_per_klass[klass].append((node, scaled_weight_dict[node]))

    top_node_per_klass = [None] * partition_len
    for klass, node_freq in node_freq_per_klass.items():
        top_node_per_klass[klass] = max(node_freq, key=lambda x : x[1])[0]

    partition_pass_to_template = {
        'partition_len': partition_len,
        'top_node_per_klass': top_node_per_klass,
    }
    return partition_pass_to_template
Пример #35
0
eIdx = 0

for nOrE in df['New/Existing'].tolist():
    if nOrE == 'New':
        newNaming.append("n{}".format(nIdx))
        nIdx += 1
    elif nOrE == 'Existing':
        newNaming.append("e{}".format(eIdx))
        eIdx += 1

df['newNaming'] = newNaming

nameDict = returnNameToStrainDict(df, loci)
distMat = returnDistMatrix(nameDict, loci)
distMat_df = pd.DataFrame(distMat,
                          index=nameDict.keys(),
                          columns=nameDict.keys())
graph = nx.from_pandas_adjacency(distMat_df)
mst = nx.minimum_spanning_tree(graph)
#node_pos = nx.get_node_attributes(mst,'pos')
node_pos = nx.spring_layout(mst,
                            k=4 * 1 / np.sqrt(len(mst.nodes())),
                            iterations=80,
                            random_state=1992)
labels = nx.get_edge_attributes(mst, 'weight')
plt.figure()
nx.draw_networkx_edge_labels(mst, node_pos, edge_labels=labels, font_size=4)
nx.draw_networkx(mst, pos=node_pos, node_size=10, font_size=1, alpha=0.65)
#nx.draw_networkx(mst,pos=node_pos,node_size=90,font_size=5, alpha=0.65)
plt.savefig("MST.png", dpi=350)