Пример #1
1
def plot_chord(G, notebook=False):
    """
    render an interactive chord plot of the graph.

    If notebook is False, starts an bokeh app in a browser window, 
    if True, renders the plot directly in the cell.
    """

    import scConnect as cn
    import holoviews as hv
    from holoviews import opts, dim
    import networkx as nx
    import pandas as pd
    import numpy as np

    # instantiate the bokeh renderer
    renderer = hv.renderer('bokeh')
    hv.extension("bokeh")
    hv.output(size=250)

    # set visuals
    opts.defaults(
        opts.Chord(
            node_cmap='Category20',
            edge_cmap='Category20',
            edge_color=dim("source"),
            labels='cluster',
            node_color=dim('cluster'),
            inspection_policy="edges",
            toolbar="above",
        ))

    # Creat the Dataset object to be passed to the Chord object (NOTE: aggregates the data
    # leaving only one edge per cluster pair)
    edges = nx.to_pandas_edgelist(G)
    links = hv.Dataset(edges, ["source", "target"],
                       ["weighted_score", "loyalty"]).sort(
                           by="source").aggregate(function=np.sum)
    nodes = hv.Dataset(list(G.nodes), 'cluster').sort(by="cluster")

    # Calculate values for threshold representing given percentiles
    percentiles = [0, 20, 40, 60, 80, 90, 95, 99]
    th_values = np.percentile(links.data["weighted_score"], percentiles)

    th = hv.Dimension(("th", "weighted scores threshold"),
                      default=th_values[0])

    # Filter data on threshold, and return a chord element
    def chord_graph(th):
        links_ = links.select(weighted_score=(th, None))
        chord = hv.Chord((links_, nodes))

        return chord

    # instantiate the dynamic map
    chord = hv.DynamicMap(chord_graph, kdims=[th]).redim.values(th=th_values)

    # Run the server if not in notebook
    if notebook == False:
        server = renderer.app(chord, show=True, new_window=True)

    if notebook == True:
        return chord
Пример #2
0
def generate_chord_diagram(responses_count, thr_count=5):

    # generate dataframes as required for the plotting function
    plot_data = responses_count.loc[responses_count['count'] > 0,
                                    ['index', 'target', 'count']]
    plot_data.columns = ['source', 'target', 'value']
    plot_data.index = np.arange(len(plot_data))

    nodes = responses_count.loc[responses_count['count']>0, ['index', 'screen_name', 'party']].\
                            drop_duplicates().set_index('index').sort_index(level=0)
    nodes = hv.Dataset(nodes, 'index')
    nodes.data.head()

    # generate colormap for single accounts according to party affiliations
    person_party_cmap = dict(
        zip(responses_count['index'],
            responses_count['party'].apply(lambda row: party_cmap[row])))

    # generate plot
    chord = hv.Chord((plot_data, nodes)).select(value=(thr_count, None))
    chord.opts(
        hv_opts.Chord(cmap=party_cmap,
                      edge_cmap=person_party_cmap,
                      edge_color=hv_dim('source'),
                      labels='screen_name',
                      node_color=hv_dim('party'),
                      edge_hover_line_color='cyan',
                      node_hover_fill_color='cyan',
                      height=700,
                      width=700))

    return chord
Пример #3
0
def modify_doc(doc, mytabs):

	start, end = 1, 20
	samples_count = 5
	slider = Slider(start=start, end=end, value=start, step=1, title="Counts")
	select = Select(title="Count", value="aux", options=["box", "pack", "image", "user"])

	renderer = hv.renderer('bokeh')##.instance(mode='server')
	hv.extension('bokeh')
	hv.output(size=200)
	links = pd.DataFrame(data['links'])
	print(links.head(3))
	nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index')
	chord = hv.Chord((links, nodes)).select(value=(samples_count, None))
	chord.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str()))    	
	# Create HoloViews plot and attach the document
	hvplot = renderer.get_plot(chord, doc)
	
	def slider_update(attrname, old, new):
		# Notify the HoloViews stream of the slider update 
		print ("update received")
		samples_count = new
		
		links = pd.DataFrame(data['links'])
		print(links.head(3))
		nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index')
		chord = hv.Chord((links, nodes)).select(value=(samples_count, None))
		chord.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str()))    	
		# Create HoloViews plot and attach the document
		hvplot = renderer.get_plot(chord, doc)
		tab3 = Panel(child=row(slider, hvplot.state), title="Chord Plot")

		mytabs.append(tab3)
		views = Tabs(tabs = mytabs)
		layout=row(views)
		doc.add_root(layout)
	
		return doc

	slider.on_change('value', slider_update)

	
	def select_update(attrname, old, new):
		# Notify the HoloViews stream of the slider update 
		print ("update received. Old: {} New: {}".format(old, new))
		

	select.on_change('value', select_update)		

	# Combine the holoviews plot and widgets in a layout

	tab3 = Panel(child=row(slider, hvplot.state), title="Chord Plot")

	mytabs.append(tab3)
	views = Tabs(tabs = mytabs)
	layout=row(views)
	doc.add_root(layout)
	
	return doc
Пример #4
0
def plot_connection_graph():
	lst = read_csv_list("results_ip_comp.csv")[1:1000]
	lst = [x for x in lst if float(x[-1]) < float(10000)]
	links = pd.DataFrame({ 'source': [x[0] for x in lst], 'target': [x[1] for x in lst]})
	chord = hv.Chord(links).select(value=(5, None))
	chord.opts(
		opts.Chord(cmap='Category20', edge_cmap='Category20', 
			edge_color=dim('source').str(), labels='name',
			node_color=dim('index').str()))
def chordDiagram(person_id, df_enron):
    import holoviews as hv
    from holoviews import opts
    from bokeh.resources import CDN
    from bokeh.embed import file_html

    hv.extension('bokeh')

    df_chord = df_enron.sort_values('fromJobtitle')
    df_chord['index'] = df_chord.index

    df_links = df_chord.groupby(['fromId', 'toId']).agg({
        'date': 'count',
        'sentiment': 'mean'
    })
    df_links = df_links.reset_index()[['fromId', 'toId', 'date', 'sentiment']]
    df_links.columns = ['source', 'target', 'value', 'sentiment']

    x = df_chord[['fromId', 'fromJobtitle']].drop_duplicates()
    x.columns = ['source', 'fromJobtitle']

    df_links = pd.merge(df_links, x, on="source")
    df_links.drop_duplicates(subset='source')

    df_nodes = df_chord[['fromId', 'fromEmail', 'fromJobtitle'
                         ]].drop_duplicates().reset_index(drop=True)
    df_nodes.columns = ['index', 'name', 'group']
    df_nodes.sort_values('name')
    y = df_chord[['fromId',
                  'toId']].drop_duplicates().groupby(['fromId'
                                                      ]).count().reset_index()
    y.columns = ['index', 'size']
    df_nodes = pd.merge(df_nodes, y, on='index')
    df_nodes['size'] = df_nodes['size'] / 3 + 8

    nodes = hv.Dataset(df_nodes, 'index')
    edge_df = df_links

    import seaborn as sns  # also improves the look of plots
    sns.set()  # set Seaborn defaults

    chord = hv.Chord((df_links, nodes)).select(value=(5, None))
    chord.opts(
        opts.Chord(cmap='Category20',
                   edge_cmap='Category20',
                   edge_color='sentiment',
                   labels='name',
                   node_color='group',
                   edge_alpha=0.8,
                   edge_line_width=1.5))

    final_chord = chord.select(index=person_id)

    plot = hv.render(final_chord, backend='bokeh')
    item_text = json.dumps(json_item(plot))
    return item_text
Пример #6
0
def plot_chord_graph(df, col_idx, sep="; ", height=800, width=800, top_n=None, cmap='glasbey_light'):
    """
    Plots a chord plot for the different categories
    Reference: https://holoviews.org/gallery/demos/bokeh/route_chord.html

    :param df: dataframe to analyse
    :param col_idx: The column name
    :param sep: Separator to use to split the column value
    :param height: height of the final image
    :param width: width of the final image
    :param top_n: Plot only top n nodes (optional)
    :param cmap: Colour scheme for the graph
    :return:
    """
    plot_df = df[[col_idx]].dropna()
    plot_df[col_idx] = plot_df[col_idx].str.split(sep)

    # Get the nodes and node weights
    nodes = [x for l in plot_df[col_idx] for x in l]
    node_wts = Counter(nodes)
    nodes_df = pd.DataFrame({'Key': list(node_wts.keys()),
                             'Count': list(node_wts.values())})
    nodes_df.sort_values('Count', inplace=True, ascending=False)
    nodes_df['ID'] = [i for i in range(0, nodes_df.shape[0])]

    nodes = hv.Dataset(nodes_df, 'ID', 'Key')

    # Get the edges
    edges = plot_df[col_idx].apply(lambda x: [(*sorted(c),) for c in combinations(x, 2)])
    edges = [edge for row in edges for edge in row]
    edge_wts = Counter(edges)

    edges_df = pd.DataFrame({'Source_Dest': list(edge_wts.keys()),
                             'Count': list(edge_wts.values())})
    edges_df.sort_values('Count', inplace=True, ascending=False)
    edges_df[['Source', 'Dest']] = pd.DataFrame(edges_df['Source_Dest'].tolist(), index=edges_df.index)

    edges_df = edges_df.merge(nodes_df[['Key', 'ID']], left_on=["Source"], right_on=["Key"])
    edges_df = edges_df.merge(nodes_df[['Key', 'ID']], left_on=["Dest"], right_on=["Key"])
    edges_df.rename(columns={'ID_x': 'Source_ID', 'ID_y': 'Dest_ID'}, inplace=True)

    chord = hv.Chord((edges_df, nodes), ['Source_ID', 'Dest_ID'], ['Count'])

    if top_n:
        most_used_lang = chord.select(ID=list(nodes_df.iloc[:top_n]['ID']), selection_mode='nodes')
    else:
        most_used_lang = chord.select(ID=nodes_df['ID'].tolist(), selection_mode='nodes')

    most_used_lang.opts(
        opts.Chord(cmap=cmap, edge_color=dim('Source_ID').str(),
                   height=height, width=width, labels='Key', node_color=dim('ID').str())
    )

    return most_used_lang
Пример #7
0
	def chord(data):
		hv.extension('bokeh')
		renderer=hv.renderer('bokeh')
		hv.output(size=230)
		links=pd.DataFrame(data['links'])
		hv.Chord(links)
		nodes=hv.Dataset(pd.DataFrame(data['nodes']), 'index')
		chord=hv.Chord((links, nodes)).select(value=(10, None))
		chord.opts(
	    opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), 
	               labels='name', node_color=dim('index').str()))
		bokeh_plot=renderer.get_plot(chord).state
		html=file_html(bokeh_plot,CDN,"my plot")
		return html 
Пример #8
0
def make_chords(dict_df, type_IO='_PhysUse'):
    hv.extension('bokeh')
    hv.output(size=250)
    for key_dict, dataf in dict_df.items():
        links = dict_df[key_dict]
        nodes = hv.Dataset(pd.DataFrame(node_d), 'index')
        chord = hv.Chord((links, nodes)).select(value=(5, None))
        chord.opts(
            opts.Chord(cmap='Category20',
                       edge_cmap='Category20',
                       edge_color=dim('source').str(),
                       labels='name',
                       node_color=dim('index').str()))
        hv.save(chord, 'chord_' + str(key_dict) + str(type_IO) + '.html')
def plot_network(all_transactions):
    hv.extension('bokeh')
    hv.output(size=500)
    links = pd.DataFrame(generate_links(all_transactions))
    print(links)
    hv.Chord(links)
    nodes = hv.Dataset(pd.DataFrame(generate_nodes(all_transactions)), 'index')
    nodes.data.head()
    chord = hv.Chord((links, nodes)).select(value=(1, None))
    chord.opts(
        opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(),
                   labels='name', node_color=dim('index').str()))
    hv.save(chord, 'image.html')
    print("Network analysis complete, saved as image.html")
Пример #10
0
    def ChordDiagram(self):
        # Step 1 Get Data
        self.sharedNeos = self.GetShared()
        self.matchedNeos = self.GetMatchedNeos()

        hv.output(size=200)

        source = []
        target = []
        value = []
        for i, sam in enumerate(self.sharedNeos):
            for pair in self.sharedNeos[sam]:
                source.append(sam + "_" + pair.split(',')[0])
                target.append(sam + "_" + pair.split(',')[1])
                value.append(self.sharedNeos[sam][pair])

        for matched in self.matchedNeos:
            source.append(matched.split(',')[0])
            target.append(matched.split(',')[1])
            value.append(self.matchedNeos[matched])

        links = pd.DataFrame({
            'source': source,
            'target': target,
            'value': value
        })

        chord = hv.Chord(links)

        # chord = hv.Chord((links, nodes)).select(value=(5, None))
        chord.opts(
            opts.Chord(cmap='Category20',
                       edge_cmap='Category20',
                       labels='index',
                       node_color=dim('index').str()))

        p = hv.render(chord)

        select = Select(title="Option:",
                        value="foo",
                        options=["foo", "bar", "baz", "quux"])

        return (p, select)
Пример #11
0
	def slider_update(attrname, old, new):
		# Notify the HoloViews stream of the slider update 
		print ("update received")
		samples_count = new
		
		links = pd.DataFrame(data['links'])
		print(links.head(3))
		nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index')
		chord = hv.Chord((links, nodes)).select(value=(samples_count, None))
		chord.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='name', node_color=dim('index').str()))    	
		# Create HoloViews plot and attach the document
		hvplot = renderer.get_plot(chord, doc)
		tab3 = Panel(child=row(slider, hvplot.state), title="Chord Plot")

		mytabs.append(tab3)
		views = Tabs(tabs = mytabs)
		layout=row(views)
		doc.add_root(layout)
	
		return doc
Пример #12
0
def plot_chord(predictions, filename):
    to_use = predictions.copy()
    for n, row in predictions.iterrows():
        if row.MLSynergy_score < 0:
            to_use.at[n, "MLSynergy_score"] = row.MLSynergy_score * -1
            to_use.at[n, "Interaction"] = "Synergy"
        else:
            to_use.at[n, "Interaction"] = "Antagony"
    hv.extension('bokeh')
    hv.output(size=200)
    to_use2 = to_use[to_use.NumbDrugs == 2]
    links = to_use2[["Drug1", "Drug2", "MLSynergy_score", "Interaction"]]
    drugs = list(links["Drug1"].unique()) + list(links["Drug2"].unique())
    nodes = hv.Dataset(drugs, 'Drug')
    chord = hv.Chord((links, nodes)).select(value=(1, None))
    chord.opts(opts.Chord(cmap='Rainbow', edge_cmap='Rainbow',\
                          edge_color=dim('Interaction').str(), labels='Drug',\
                          node_color=dim('Drug').str()))
    output_file(filename)
    show(hv.render(chord))
    return to_use2
Пример #13
0
        def chord_diagram(self, launch):
            try:

                def normalize_value(x, total):
                    x = int((x / total) * 1000)
                    if x <= 0:
                        return 1
                    return x

                df = self.df.copy()

                # --------------  nodes
                data = {}
                data['nodes'] = []
                source_list = df['milestone_owner'].tolist()
                names = list(set(source_list))

                person_type_dict = dict(zip(df.milestone_owner, df.type))
                type_dict = {}
                types = list(set(df['type'].tolist()))
                name_dict = {}
                for idx, name in enumerate(names):
                    name_dict[name] = idx

                for idx, name in enumerate(names):
                    type_tmp = person_type_dict[name]
                    index = name_dict[name]
                    data['nodes'].append({
                        'OwnerID': index,
                        'index': idx,
                        'Type': type_tmp
                    })

                nodes = hv.Dataset(pd.DataFrame(data['nodes']), 'index')

                # --------- make the links

                data['links'] = []

                for idx, row in df.iterrows():
                    src = name_dict[row['project_owner']]
                    tgt = name_dict[row['milestone_owner']]
                    val = row['remuneration']
                    data['links'].append({
                        'source': src,
                        'target': tgt,
                        'value': val
                    })

                links = pd.DataFrame(data['links'])
                # get the individual links
                links = links.groupby(['source', 'target'])['value'].sum()
                links = links.reset_index()
                total = links['value'].sum()
                links['value'] = links['value'].apply(
                    lambda x: normalize_value(x, total))

                # filter for top percentile
                quantile_val = links['value'].quantile(
                    self.chord_data['percentile_threshold'])
                links = links[links['value'] >= quantile_val]
                #logger.warning('after quantile filter:%s',len(links))

                chord_ = hv.Chord((links, nodes), ['source', 'target'],
                                  ['value'])
                chord_.opts(
                    opts.Chord(cmap='Category20',
                               edge_cmap='Category20',
                               edge_color=dim('source').str(),
                               labels='Type',
                               node_color=dim('index').str(),
                               width=1000,
                               height=1000))

                return chord_

            except Exception:
                logger.error('chord diagram', exc_info=True)
Пример #14
0
# Specify the plot render to use
hv.extension('bokeh')
hv.output(size=300)

# Chord diagram with interactive components
edgeList = edges[['Source', 'Target', 'weight']]
# Within the holoviews dataset object we define kdim and vdims
# Kdims are the independent variables which is Id in this example
# Vdims are dependent variables cent_value and rank_value
# By defining these here were can use them when creating the graph
nodeDS = hv.Dataset(nodes_extended, 'Id', ['cent_value', 'rank_value'])

# Coloured interactive chord diagram with node size determined by Vdims
kwargs = dict(width=300, height=300, xaxis=None, yaxis=None)
opts.defaults(opts.Nodes(**kwargs), opts.Graph(**kwargs))

graph = hv.Graph((edgeList, nodeDS), label='GoT season 1')
graph.opts(cmap='Category20',
           edge_cmap='Category20',
           node_size='cent_value',
           edge_line_width=1,
           node_color=dim('Id').str(),
           edge_color=dim('Source').str())
graph.opts(
    opts.Chord(inspection_policy='nodes',
               tools=['hover'],
               edge_hover_line_color='green',
               node_hover_fill_color='red'))

hv.save(graph, 'node_size_chord.html')
Пример #15
0
def hv_chord(contrast, frequency, threshold, stats, re_order_ind, label_names, des, freq_vect):

    """
    Makes a holoview/boken chord diagram
    :param contrast:
        Which contrast in the glm data are we looking at
    :param frequency:
        The index of the frequency in the freq_vect we are looking at
    :param threshold:
        The percentile threshold for plotting (so the plot isn't messy)
    :param stats:
        The stats array we are plotting from
    :param re_order_ind:
        The indices for re-ordering the data into the Y-order of the parcels in the brain
    :param label_names:
        The names for each parcel
    :param des:
        The design matrix from the GLM
    :param freq_vect:
        The frequency vector containing the actual frequencies of each
    :return:
    """
    dtypes = np.dtype([
        ('source', int),
        ('target', int),
        ('value', int),
    ])

    data = np.empty(0, dtype=dtypes)
    links = pd.DataFrame(data)

    square = stats[contrast,:,:,frequency]


    #square = stats[contrast,:,:,:].sum(axis=2)
    thresh_mask = square > np.percentile(square, threshold)
    square[~thresh_mask] = 0

    #reorder
    X_sorted = np.copy(square)
    # Sort along first dim
    X_sorted = X_sorted[re_order_ind,:]
    # Sort along second dim
    X_sorted = X_sorted[:,re_order_ind]

    labels_sorted = np.array(label_names)[re_order_ind]
    # loop through Y axis of matrix
    counter = 0
    for i in range(X_sorted.shape[0]):
        for ii in range(X_sorted.shape[1]):
            links.loc[counter] = [i, ii, int(X_sorted[i,ii])]
            counter +=1

    # make label index
    dtypes = np.dtype([
        ('name', int),
        ('group', int),
    ])

    data = np.empty(0, dtype=dtypes)
    nodes = pd.DataFrame(data)

    for i in range(X_sorted.shape[0]):
        nodes.loc[i] = [labels_sorted[i], 1]

    graph = hv.Chord((links, hv.Dataset(nodes, 'index')))
    graph.opts(
        opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(),
                   labels='name', node_color=dim('index').str())
    )
    graph.relabel('Directed Graph').opts(directed=True)
    graph.opts(title=f'{des.contrast_names[contrast]} Partial Directed Coherence @ {int(freq_vect[frequency])}Hz')
    return graph
Пример #16
0
def plot_connectogram(connectivity_matrix,
                      atlas_labels,
                      atlas_indices,
                      threshold=None,
                      chord_type=int,
                      dst_dir=None,
                      filename=None):
    '''Plot a connectivity matrix as a connectogram.
    

    Parameters
    ----------
    connectivity_matrix : np.array
        A symmetric connectivity matrix.
    atlas_labels : pd.Series or list
        A list-like object providing names of each atlas region.
    atlas_indices : pd.Series or list
        A list-like object providing indices of each atlas region.
    threshold : float or int, optional
        Apply a threshold to the connectivity matrix before plotting. Only connectvity
        values that are greater or equal than this threshold are visualized. 
    chord_type : int or float, optional
        Convert the connectivity values to float or int type. If the weight values 
        are integers, they define the number of chords to 
        be drawn between the source and target nodes directly. If the weights 
        are floating point values, they are normalized to a default of 500 chords, 
        which are divided up among the edges. Any non-zero weight will be assigned 
        at least one chord. The default is int.
    dst_dir : str, optional
        Name of the output directory. The default is None.
    filename : str, optional
        Name of the file (must be provided including the extenstion). 
        The default is None.


    Returns
    -------
    connectogram_plot : holoviews.element.graphs.Chord
        The connectogram plot object.

    '''

    # copy matrix
    connectivity_matrix = connectivity_matrix.copy()

    # set lower triangle to NaN (since matrix is symmetric we want to remove duplicates)
    il = np.tril_indices(len(connectivity_matrix))
    connectivity_matrix[il] = np.nan

    # convert to pd.DataFrame for further processing
    connectivity_matrix_df = pd.DataFrame(data=connectivity_matrix,
                                          columns=atlas_indices,
                                          index=atlas_indices)

    # Ensure that index name has the default name 'Index'
    if connectivity_matrix_df.index.name:
        connectivity_matrix_df.index.name = None

    # stack connectivity_matrix
    connectivity_matrix_stacked = connectivity_matrix_df.stack().reset_index()
    connectivity_matrix_stacked.columns = ['source', 'target', 'value']

    if chord_type == int:
        connectivity_matrix_stacked = connectivity_matrix_stacked.astype(int)

    # reduce to only connections that are not 0
    connectivity_matrix_stacked = connectivity_matrix_stacked.loc[
        connectivity_matrix_stacked['value'] != 0, :]

    # Optional: reduce to only connections >= threshold
    if threshold:
        connectivity_matrix_stacked = connectivity_matrix_stacked.loc[
            connectivity_matrix_stacked['value'] >= threshold, :]

    # add node infos and show only nodes that also have a connection after subsetting to
    # connections that are not zero and (optionally) connections that pass the specified threshold
    atlas_df = pd.DataFrame({
        'region_id': atlas_indices,
        'label': atlas_labels
    })
    nodes_to_show = np.unique(connectivity_matrix_stacked[['source',
                                                           'target']].values)
    atlas_df = atlas_df.loc[atlas_df['region_id'].isin(nodes_to_show)]
    nodes = hv.Dataset(atlas_df, 'region_id', 'label')

    # create plot
    connectogram_plot = hv.Chord((connectivity_matrix_stacked, nodes),
                                 ['source', 'target'], ['value'])
    connectogram_plot.opts(
        opts.Chord(cmap='Category20',
                   edge_cmap='Category20',
                   edge_color=dim('source').str(),
                   node_color=dim('region_id').str(),
                   labels='label'))

    # save plot
    if dst_dir:
        if not filename:
            raise ValueError('Please provide a filename')

        dst_path = dst_dir + filename
        hv.save(connectogram_plot, dst_path)

    # FIXME: this doesn't work for me in Spyder
    show(hv.render(connectogram_plot))

    return connectogram_plot
Пример #17
0
    "index": 1
}, {
    "name": "Cytosine",
    "index": 2
}, {
    "name": "Guanine",
    "index": 3
}, {
    "name": "Thymine",
    "index": 4
}]
nodes = hv.Dataset(pd.DataFrame(nodos), 'index')

# generate the diagram with given options
# these options create a diagram with labels, nodes, and edges
# comment these two lines if you want to make a all-black diagram
#chord = hv.Chord((links, nodes))
#chord.opts(
#    opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(),
#               labels="name", node_color=dim('index').str()))

# use these options to create a diagram in black with no labels, no nodes, and no edges
# these options create a more artsy diagram
# comment these lines if you want to keep the oob options
chord = hv.Chord(links)
chord.opts(
    opts.Chord(edge_color="black",
               node_color="black",
               node_size=1,
               edge_visible=False))
Пример #18
0
def chord_diagram(df_flow_MN, n_cycles, dir_path):
    '''
    Function to plot chord diagram for flows across industry sectors
    '''
    df_flow_MN = df_flow_MN.loc[df_flow_MN['Option'] == 'Industrial']
    df_flow_MN = df_flow_MN[['Cycle', 'Generator Industry Sector',
                             'Flow transferred', 'RETDF Industry Sector',
                             'Recycled flow', 'Industry sector']]

    Flows = {'waste': {'Generator Industry Sector': 'source',
                       'RETDF Industry Sector': 'target',
                       'Flow transferred': 'value'},
             'recyled': {'RETDF Industry Sector': 'source',
                         'Industry sector': 'target',
                         'Recycled flow': 'value'}}

    df_links = pd.DataFrame()
    for Flow, Link in Flows.items():

        cols = list(Link.keys())
        df_links_aux = df_flow_MN[['Cycle'] + cols]
        df_links_aux = df_links_aux.groupby(['Cycle'] + cols[0:2],
                                            as_index=False).sum()
        df_links_aux.drop(columns='Cycle', inplace=True)
        df_links_aux = df_links_aux.groupby(cols[0:2], as_index=False).sum()
        df_links_aux[cols[2]] = df_links_aux[cols[2]]/n_cycles
        df_links_aux['flow'] = Flow
        df_links_aux.rename(columns=Link, inplace=True)
        if Flow == 'waste':
            # 1 metric ton/yr
            df_links_aux = df_links_aux[df_links_aux['value'] >= 1000]
        df_links = pd.concat([df_links, df_links_aux],
                             ignore_index=True,
                             sort=True, axis=0)
    df_links = df_links.loc[df_links['source'] != df_links['target']]
    Nodes = set(df_links['source'].unique().tolist()
                + df_links['target'].unique().tolist())
    Nodes = {node: i for i, node in enumerate(Nodes)}
    df_links = df_links.replace({'source': Nodes,
                                 'target': Nodes})

    df_nodes = pd.DataFrame({'index': [idx for idx in Nodes.values()],
                             'name sector': [name for name in Nodes.keys()]})
    df_nodes['name'] = df_nodes['index'].apply(lambda x: f'Sector {x+1}')

    for Flow in ['waste', 'recyled']:
        try:
            df_links_plot = df_links.loc[df_links['flow'] == Flow,
                                         ['source', 'target', 'value']]
            sources = df_links_plot['source'].unique().tolist()
            search = df_links_plot.loc[~df_links_plot['target']
                                       .isin(sources), 'target'].unique().tolist()
            for s in search:
                df_links_plot = pd.concat([df_links_plot,
                                           pd.DataFrame({'source': [s],
                                                         'target': [s],
                                                         'value': [10**-50]})],
                                          ignore_index=True,
                                          sort=True, axis=0)
            hv.Chord(df_links_plot)
            nodes = hv.Dataset(df_nodes, 'index')

            chord = hv.Chord((df_links_plot, nodes)).select(value=(5, None))
            chord.opts(
                       opts.Chord(cmap='Category20', edge_cmap='Category20',
                                  edge_color=dim('source').str(),
                                  labels='name', node_color=dim('index').str()))

            df_nodes.to_csv(f'{dir_path}/chord_{Flow}.csv', sep=',', index=False)
            hv.save(chord, f'{dir_path}/chord_{Flow}.pdf', fmt='pdf')
        except ValueError:

            print(f'There are not records for {Flow} activities')
Пример #19
0
                        'source': edge[0],
                        'target': edge[1],
                        'weight': 1,
                        'node': node[0],
                        'package': node[1]['package'],
                    })
                except KeyError:
                    #takes care of base module imports
                    pass
        df = pd.DataFrame(graph_data)
        return df


if __name__ == '__main__':
    test = ImportGraph(
        directory=Path('/home/dal/PycharmProjects/pyjanitor_fork/janitor'))
    df = test.output_graph()
    hv.extension('bokeh')
    defaults = dict(width=1000, height=1000, padding=0.1)
    chord = hv.Chord((df[['source', 'target',
                          'weight']], df[['node', 'package']]))
    chord.opts(
        opts.Chord(cmap='Category20',
                   edge_cmap='Category20',
                   edge_color=dim('source').str(),
                   labels='node',
                   node_color=dim('package').str(),
                   width=1000,
                   height=1000))
    hv.save(chord, 'test.html')
Пример #20
0
def hv_generator(ontology_id_list):
    try:
        mentions = {}
        for selectedID in ontology_id_list:
            if selectedID in ontoterminology.keys():
                # print("ontoterminology selectedID 'NAME'", ontoterminology[selectedID]['NAME'])
                # print("set:  ", set(ontoterminology[selectedID]['PMID']))
                mentions[ontoterminology[selectedID]['NAME']] = set(
                    ontoterminology[selectedID]['PMID'])
                # print("got one: ", mentions[ontoterminology[selectedID]['NAME']])
            else:
                # print("No mentions found for ",selectedID)
                pass
        # print("loaded mentions", mentions)
        chn_list = []
        for source in mentions:
            # print("plain source: ", source)
            for target in mentions:
                if source.strip() == "" or target.strip() == "":
                    # print("blank source or target")
                    pass
                elif source.strip() == target.strip():
                    # print("intersection: ", source.strip())
                    pass
                else:
                    intersection = mentions[source].intersection(
                        mentions[target])
                    if len(intersection) > 0:
                        chn = {
                            "source": source,
                            "target": target,
                            "PMID": len(intersection)
                        }
                        #inverse duplicate checking here:
                        add_item = True
                        for k in chn_list:
                            if source + target == k['target'] + k['source']:
                                add_item = False
                        if add_item:
                            chn_list.append(chn)
        print("finished checking for inverse duplicates..")
        # print("length of intersection list: ", len(chn_list))
        # print(chn_list)

        # Build the data table expected by the visualisation library
        links = pd.DataFrame.from_dict(chn_list)
        node_names = links.source.append(links.target)
        node_names = node_names.unique()
        # print(node_names)
        node_info = {
            "index": node_names,
            "name": node_names,
            "group": [1] * len(node_names)
        }
        # print(node_info)
        nodes = hv.Dataset(pd.DataFrame(node_info), 'index')
        nodes.data.head()

        chord = hv.Chord((links, nodes)).select(
            value=(0, None))  # value=5 - changing to 0 works for more?

        chord.opts(
            opts.Chord(cmap='Category20',
                       edge_cmap='Category20',
                       edge_color='source',
                       labels='name',
                       node_color='index'))
        renderer = hv.renderer('bokeh')
        hvplot = renderer.get_plot(chord)
        html = renderer.static_html(hvplot)
        return json.dumps(html)
    except Exception as e:
        print(e)
        traceback.print_exc()
        html_error_message = "<!doctype html><div><h4>ERROR CREATING TABLE - no associations found, or possibly some of the ID's were incorrect?</h4></div></html>"
        return (json.dumps(html_error_message))
Пример #21
0
    def chordGraphByMajor(self,
                          coefficient=0.5,
                          pval=0.05,
                          outputName='majorGraph',
                          outputSize=200,
                          imageSize=300,
                          showGraph=True,
                          outputImage=True):
        """Creates a chord graph between available majors through averaging and filtering both correlation coefficients and P-values. Outputs to an html file, PNG file, and saves the underlying data by default.

    Note:
        The 'classDept' column as set by :obj:`defineWorkingColumns` must have been defined in your dataset to filter by major.

    Args:
        coefficient (:obj:`float`, optional): Minimum correlation coefficient to filter correlations by.
        pval (:obj:`float`, optional): Maximum P-value to filter correlations by. Defaults to 0.05 (a standard P-value limit used throughout the sciences)
        outputName (:obj:`str`, optional): First part of the outputted file names, e.g. fileName.csv, fileName.html, etc.
        outputSize (:obj:`int`, optional): Size (units unknown) of html graph to output. 200 by default.
        imageSize (:obj:`int`, optional): Size (units unknown) of image of the graph to output. 300 by default. Increase this if node labels are cut off.
        showGraph (:obj:`bool`, optional): Whether or not to open a browser and display the interactive graph that was created. Defaults to :obj:`True`.
        outputImage (:obj:`bool`, optional): Whether or not to export an image of the graph. Defaults to :obj:`True`. 
    
    """
        # M: The parameters should usually be changed when the function is called!!

        # M: initialized holoview of size outputSize
        hv.output(size=outputSize)

        # M:  creates a copy of df and sets course1 and course2 to the elements in the respective rows w
        #     substring index 0 to the first number, exclusive (if number is first, element would be empty)
        majorFiltered = self.df.copy()
        # M: added the makeMissingValuesNanInColumn so that none of the entries are empty
        # majorFiltered.removeNanInColumn('course1')
        # majorFiltered.removeNanInColumn('course2')
        majorFiltered['course1'] = majorFiltered['course1'].apply(
            lambda course: re.findall('\A\D+', course)[0])
        majorFiltered['course2'] = majorFiltered['course2'].apply(
            lambda course: re.findall('\A\D+', course)[0])

        # sets majors to the unique remaining tuples of course1
        majors = majorFiltered['course1'].unique().tolist()
        majors.sort()

        majorCorrelations = []
        usedMajors = []

        # M: Makes the data in corr, P-value, and #students attributes numeric
        majorFiltered['corr'] = pd.to_numeric(majorFiltered['corr'])
        majorFiltered['P-value'] = pd.to_numeric(majorFiltered['P-value'])
        majorFiltered['#students'] = pd.to_numeric(majorFiltered['#students'])

        count = 0
        # M: loops through unique majors in course 1(those w/o numerical beginning)
        for major in majors:
            # Adds 1 to count then prints the number of elements in majors
            count += 1
            print(str(count) + ' / ' + str(len(majors)) + ' majors')

            # M: sets filteredToMajor to the majorFiltered where course 1 column is equal to 'major' in the majors list
            filteredToMajor = majorFiltered.loc[majorFiltered['course1'] ==
                                                major]
            # M: sets connectedMajors to the unique values in course2 column
            connectedMajors = filteredToMajor['course2'].unique().tolist()

            # M: loops through the unique majors in course 2 (those w/o numerical beginning)
            for targetMajor in connectedMajors:
                # M: Sets filteredToMajorPair to the tuple(s) where course 1 is 'major' and course 2 is 'targetMajor'
                filteredToMajorPair = filteredToMajor.loc[
                    filteredToMajor['course2'] == targetMajor]
                # M: Finds means for corr, PVal, and Students
                avgCorr = int(filteredToMajorPair['corr'].mean() * 100)
                avgPVal = filteredToMajorPair['P-value'].mean()
                avgStudents = filteredToMajorPair['#students'].mean()

                # M: ensures no corr following the constraints are counted twice and adds it to the list of correlations
                if avgCorr > (coefficient *
                              100) and major != targetMajor and avgPVal < pval:
                    if (targetMajor, major) not in usedMajors:
                        usedMajors.append((major, targetMajor))
                        majorCorrelations.append((major, targetMajor, avgCorr,
                                                  avgPVal, avgStudents))

        # M: Tells us how many correlations found
        if len(majorCorrelations) == 0:
            print('Error: no valid correlations found.')
            return
        print(str(len(majorCorrelations)) + ' valid major correlations found.')

        # M: Sets output to majorCorrelations and sets the column names
        output = pd.DataFrame(majorCorrelations,
                              columns=('source', 'target', 'corr', 'P-value',
                                       '#students'))
        # M: Sets newMajors to have the unique sources and targets (by putting them in a set)
        newMajors = set(output['source'])
        newMajors.update(output['target'])
        # M: Sets sortedMajors to one list of sources and targets, all sorted
        sortedMajors = sorted(list(newMajors))

        # M: sets 'nodes' to be sortedMajors w/ column name 'name'
        nodes = pd.DataFrame(sortedMajors, columns=['name'])

        # M: added this to check the value of output source and target before the apply
        print("source before:", output['source'])
        print("target before:", output['target'])

        # M: output source and target are changed to numeric instead of string objects to represent the sources and targets
        output['source'] = output['source'].apply(
            lambda major: nodes.index[nodes['name'] == major][0])
        output['target'] = output['target'].apply(
            lambda major: nodes.index[nodes['name'] == major][0])

        # M: Added this to check what each value would be set to after the apply
        print("index:", nodes.index[nodes['name'] == major][0])
        print("source now:", output['source'])
        print("target now:", output['target'])
        print(output['source'].dtype)

        # M: constructs the chord graph
        output.to_csv(outputName + '.csv', index=False)
        hvNodes = hv.Dataset(nodes, 'index')
        chord = hv.Chord((output, hvNodes)).select(value=(5, None))
        chord.opts(
            opts.Chord(cmap='Category20',
                       edge_cmap='Category20',
                       edge_color=dim('source').str(),
                       labels='name',
                       node_color=dim('index').str()))
        graph = hv.render(chord)
        output_file(outDir + outputName + '.html', mode='inline')
        # M: Saves and shows graph if showGraph true
        save(graph)
        if showGraph:
            show(graph)
        chord.opts(toolbar=None)
        # M: changes size to imageSize then saves it to outDir +outputName + '.png'
        if outputImage:
            hv.output(size=imageSize)
            try:
                export_png(hv.render(chord),
                           filename=outDir + outputName + '.png')
            except RuntimeError:
                print(
                    "Runtime error in chordGraphByMajors in line with export_png(hv.render(chord), filename=outDir +outputName + '.png')"
                )
        # M: changes size to outputSize
        hv.output(size=outputSize)
Пример #22
0
PIOT_Z['group'] = PIOT_Z.index1.astype(str).str[0:2]
df = PIOT_Z[['index1', 'group']].copy().reset_index().drop('index', 1)
Key = pd.DataFrame(df.group.unique()).reset_index()
Key.columns = ['group', 'NAICS_2']
df_fin = df.merge(Key, left_on=['group'], right_on=['NAICS_2'])
df_fin = df_fin.drop(['group_x', 'NAICS_2'], 1)
df_fin.columns = ['index', 'group']
node_desc = df_fin.to_dict('records')

nodes = hv.Dataset(pd.DataFrame(node_desc), 'index')

chord = hv.Chord((links, nodes)).select(value=(5, None))
chord.opts(
    opts.Chord(cmap='Category20',
               edge_cmap='Category20',
               edge_color=dim('source').str(),
               labels='name',
               node_color=dim('index').str()))

hv.save(chord, 'chord_piotalt.html')
###### Do the same thing for the MIOT ##############

# MIOT = "SIOT_BEA2012AR_CxC.xlsx"

# Z_Monetary = pd.read_excel(MIOT, usecols = 'A: OP', skipfooter = 6, header = 0, index_col = 0)
# Z_Monetary = format_idx_col(Z_Monetary)

# tidy_MIOT = pd.melt(Z_Monetary.reset_index(), id_vars = 'index')
# Probs = tidy_MIOT[tidy_MIOT['value'] < 0].index
# tidy_MIOT = elim_nan(tidy_MIOT)
# tidier_MIOT = tidy_MIOT.copy()
Пример #23
0
  - *target*: destination of the connection
  - *value*: the strength of the connection
  - *sign*: whether the connection is positive (+1) or negative (-1)

def mat2chord(vec, t=0, cthresh=0.25):
    def mat2links(x, ids):
        links = []
        for i in range(x.shape[0]):
            for j in range(i):
                links.append({'source': ids[i], 'target': ids[j], 'value': np.abs(x[i, j]), 'sign': np.sign(x[i, j])})
        return pd.DataFrame(links)
    
    links = mat2links(tc.vec2mat(vec)[:, :, t], rois['ID'])
    chord = hv.Chord((links, hv.Dataset(rois, 'ID'))).select(value=(cthresh, None))
    chord.opts(
        opts.Chord(cmap='Category20', edge_cmap='Category20', edge_color=dim('source').str(), labels='Region', node_color=dim('ID').str())
    )
    return chord

Here's the first correlation matrix:

hmap = mat2chord(isfc, t=0)

# This is to render chord plot in jupyter-book
html_repr = file_html(pn.Column(hmap).get_root(), CDN)
IPython.display.HTML(html_repr)

Now let's create an interactive figure to display the dynamic network patterns, with a slider for controlling the timepoint:

def timecorr_explorer(x, cthresh=0.25):
    hv.output(max_frames=x.shape[0])
Пример #24
0
def plot4():

    df_data = pd.read_csv(file, sep=';', header=0, index_col=False)
    if not Path(df_data[:-4] + '.out.csv').is_file():
        df_data = pd.read_csv(df_data, sep=';', header=0, index_col=False)

        p = []
        d = []
        e = []
        f = []
        hold = df_data.shape[0]

        # loop that sets values first in lists for columns
        i = 0
        while i < hold:

            # Fromnames + delete row of names once listed
            b = list(df_data.columns.values)
            del b[0]
            a1 = len(b) - i
            a = list(a1 * (df_data.iloc[i, 0], ))
            del a[:1]

            # Tonames + delete names that are already linked
            p = b
            del p[:(i + 1)]

            # weights + delete weights that are already linked
            c = df_data.iloc[:, 1].tolist()
            del c[:(i + 1)]

            # remove people linked to themselves
            for ele in c:
                if ele == 1:
                    c.remove(ele)

            e = list(e + a)
            d = list(d + p)
            f = list(f + c)

            i += 1

        # df from which the plot will be made
        df_plot = pd.DataFrame(columns=['from', 'to', 'weight'])

        # puts said lists in columns
        df_plot['from'] = e
        df_plot['to'] = d
        df_plot['weight'] = f

        # delete edges with weight 0
        df_plot = df_plot.loc[df_plot['weight'] != 0.0]

        df_plot.to_csv(df_data[:-4] + '.out.csv',
                       sep='\t',
                       encoding='utf-8',
                       index=False)
    else:
        df_plot = pd.read_csv(df_data[:-4] + '.out.csv',
                              sep='\t',
                              encoding='utf-8',
                              index_col=False)

    chord = hv.Chord(df_plot)
    chord.opts(
        opts.Chord(cmap='Category20',
                   edge_cmap='Category20',
                   edge_color=dim('weight').str(),
                   labels='name',
                   node_color=dim('index').str()))

    holder2 = chord
    renderer = hv.renderer('bokeh')
    m = renderer.get_plot(holder2).state

    return json.dumps(json_item(m))