def combine_two_heatmaps_xaxis(plot1, plot2, plotname): #combine plots in a panel fig = tools.make_subplots(rows=1, cols=2, print_grid=False) for trace in plot1['data']: trace['colorbar']['x'] = 0.4 trace['colorbar']['thickness'] = 20 fig.append_trace(trace, 1, 1) for trace in plot2['data']: trace['colorbar']['x'] = 0.95 trace['colorbar']['xpad'] = 50 trace['colorbar']['thickness'] = 20 fig.append_trace(trace, 1, 2) fig['layout']['title'] = "" fig['layout']['xaxis1'].update(plot1['layout']['xaxis']) fig['layout']['xaxis1']['title'] = "" fig['layout']['xaxis1']['domain'] = [0, 0.4] fig['layout']['yaxis1'].update(plot1['layout']['yaxis']) fig['layout']['yaxis1']['title'] = "" fig['layout']['xaxis2'].update(plot2['layout']['xaxis']) fig['layout']['yaxis2'].update(plot2['layout']['yaxis']) fig['layout']['xaxis2']['domain'] = [0.55, 0.95] fig['layout']['yaxis2']['side'] = 'right' fig['layout']['yaxis2']['scaleanchor'] = 'x2' fig['layout']['xaxis2']['title'] = "" fig['layout']['yaxis2']['title'] = "" fig['layout']['font']['size'] = 18 fig['layout']['hovermode'] = 'closest' fig['layout']['margin']['t'] = 10 plotly_plot(fig, filename=plotname, auto_open=False)
def plot_freq_abs_vs_distance(distances_ab, abs, seq_sep, distance_definition, plot_dir): bins = np.arange(2,50,0.5) data = [] for ab in abs: p_r_ab = [] for i in range(len(bins)): p_r_ab.append(len(np.array(distances_ab[seq_sep][ab])[np.digitize(distances_ab[seq_sep][ab],bins)==i])) p_r_ab = np.array(p_r_ab) / float(np.sum(p_r_ab)) data.append( go.Scatter( x=bins, y=p_r_ab, mode='lines', name=str(ab) + "("+str(len(distances_ab[seq_sep][ab]))+")" ) ) layout = go.Layout( title="", xaxis=dict( title="distance bins" ), yaxis=dict( title="frequency at seq sep " + str(seq_sep) ) ) fig = go.Figure(data=data, layout=layout) plot_file = plot_dir + "/" + distance_definition + "_frequency_seqsep" + str(seq_sep) + ".html" plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_freq_abs_vs_distance(distances_ab, abs, seq_sep, distance_definition, plot_dir): bins = np.arange(2, 50, 0.5) data = [] for ab in abs: p_r_ab = [] for i in range(len(bins)): p_r_ab.append( len( np.array(distances_ab[seq_sep][ab])[np.digitize( distances_ab[seq_sep][ab], bins) == i])) p_r_ab = np.array(p_r_ab) / float(np.sum(p_r_ab)) data.append( go.Scatter(x=bins, y=p_r_ab, mode='lines', name=str(ab) + "(" + str(len(distances_ab[seq_sep][ab])) + ")")) layout = go.Layout(title="", xaxis=dict(title="distance bins"), yaxis=dict(title="frequency at seq sep " + str(seq_sep))) fig = go.Figure(data=data, layout=layout) plot_file = plot_dir + "/" + distance_definition + "_frequency_seqsep" + str( seq_sep) + ".html" plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_boxplot_correlation(pearson_r, proteins, plot_file): data = [go.Box( y=pearson_r, name = "APC vs Entropy correction", showlegend=False, boxmean=False, boxpoints='Outliers', text=proteins #jitter=0.5, #pointpos=1.8 )] plot = { "data": data, "layout" : go.Layout( font = dict(size=24), margin=dict(t=10), yaxis=dict(range=[0,1], title="Pearson correlation"), width="500", height="400" ) } plotly_plot(plot, filename=plot_file, auto_open=False, show_link=False)
def plot_1d_coupling_profile(couplings_per_bin, plot_dir, ab): group_labels = [ str(bindict['lower']) + "Å < ΔCβ < " + str(bindict['upper']) + "Å" for binname, bindict in sorted(couplings_per_bin.iteritems(), reverse=True) ] hist_data = [ bindict['couplings'] for binname, bindict in sorted(couplings_per_bin.iteritems(), reverse=True) ] nr_datapoints = int( np.round(np.mean([len(x) for x in hist_data]), decimals=-2)) # Create distplot with custom bin_size fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False) for trace in fig['data']: trace['line']['width'] = 2 fig['layout']['font'] = dict(size=16) fig['layout']['xaxis']['title'] = "couplings w_ij(" + ab + ")" fig['layout']['xaxis']['range'] = [-1, 1] fig['layout']['yaxis']['title'] = "Distribution of couplings for " + ab fig['layout']['margin']['t'] = 10 plot_name = plot_dir + "/1d_coupling_profile_" + ab + "_avgdatapoints" + str( nr_datapoints) + ".html" plotly_plot(fig, filename=plot_name, auto_open=False)
def plot_boxplot_correlation(pearson_r_pll, pearson_r_pcd, plot_file): box_pearson_pll = go.Box( y=pearson_r_pll, name = "pseudo-likelihood", showlegend=False, boxmean=False, boxpoints='outliers' ) box_pearson_pcd = go.Box( y=pearson_r_pcd, name="persistent contrastive divergence", showlegend=False, boxmean=False, boxpoints='outliers' ) plot = { "data": [box_pearson_pll, box_pearson_pcd], "layout" : go.Layout( title = "Correlation between APC and Entropy Correction", font = dict(size=24), margin=dict(t=50), yaxis=dict(range=[0,1], title="Pearson correlation"), width=900, height=450 ) } plotly_plot(plot, filename=plot_file, auto_open=False, show_link=False)
def plot_metrics(log_metric_dict, metric, plot_out): data = [] order = log_metric_dict.pop('order') for key in order: print key color = None if "1dv1A03" in key: color = "rgb(153, 00, 00)" if "1c5aA00" in key: color = "rgb(65, 105, 225)" dash = None if "1e-2.opt" in key: dash = "dot" if "1e-4.opt" in key: dash = "dash" if "0.opt" in key: dash = "solid" trace = go.Scatter( x=range(1, len(log_metric_dict[key])+1), y=log_metric_dict[key], name=key, line=dict(width=4) ) if color is not None: trace['line']['color'] = color if dash is not None: trace['line']['dash'] = dash data.append(trace) layout = go.Layout( title="", margin=dict(t=10), xaxis=dict( range=[0, 2500], title="iterations"), yaxis=dict( range=[-1.5,4], type="log", title=metric, exponentformat="e" ), font=dict(size=18) ) fig=go.Figure(data=data, layout=layout) plotly_plot(fig, filename=plot_out, auto_open=False)
def plot_alignment(aa_counts_single, title, plot_file, freq=True): Neff = np.sum(aa_counts_single[0, :]) L = aa_counts_single.shape[0] #create plot data = [] if freq: aa_counts_single /= Neff #add bar for each amino acid for each position for aa in range(20): data.append( go.Bar(x=list(range(1, L + 1)), y=aa_counts_single[:, aa].tolist(), showlegend=True, name=io.AMINO_ACIDS[aa])) layout = go.Layout(barmode='stack', title=title, xaxis=dict(title="Alignment Position"), yaxis=dict(title="Amino Acid Distribution", exponentformat='e', showexponent='All'), font=dict(size=18)) plot = {'data': data, 'layout': layout} plotly_plot(plot, filename=plot_file, auto_open=False, link_text='')
def plot_progress(self): if self.plotfile is not None: protein = os.path.basename(self.plotfile).split(".")[0] title = "Optimization Log for {0} ".format(protein) title += self.title data = [] for name, metric in self.optimization_log.items(): data.append( go.Scatter(x=list( range(1, len(self.optimization_log[name]) + 1)), y=metric, mode='lines', visible="legendonly", name=name)) plot = { "data": data, "layout": go.Layout(title=title, xaxis1=dict(title="iteration", exponentformat="e", showexponent='All'), yaxis1=dict(title="metric", exponentformat="e", showexponent='All'), font=dict(size=18), titlefont=dict(size=14)) } plotly_plot(plot, filename=self.plotfile, auto_open=False)
def plot_1d_coupling_profile(couplings_per_bin, plot_dir, ab): group_labels = [ str(bindict['lower']) + "Å < ΔCβ < " + str(bindict['upper']) + "Å" for binname, bindict in sorted(couplings_per_bin.iteritems(), reverse=True)] hist_data = [bindict['couplings'] for binname, bindict in sorted(couplings_per_bin.iteritems(), reverse=True)] nr_datapoints = int(np.round(np.mean([len(x) for x in hist_data]), decimals=-2)) # Create distplot with custom bin_size fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False) for trace in fig['data']: trace['line']['width'] = 2 fig['layout']['font'] = dict(size = 16) fig['layout']['xaxis']['title'] = "couplings w_ij("+ab+")" fig['layout']['xaxis']['range'] = [-1,1] fig['layout']['yaxis']['title'] = "Distribution of couplings for " + ab fig['layout']['margin']['t'] = 10 plot_name = plot_dir + "/1d_coupling_profile_"+ ab + "_avgdatapoints"+str(nr_datapoints)+".html" plotly_plot(fig, filename=plot_name, auto_open=False)
def combine_two_heatmaps_xaxis(plot1, plot2, plotname): #combine plots in a panel fig = tools.make_subplots(rows=1, cols=2, print_grid=False) for trace in plot1['data']: trace['colorbar']['x'] = 0.4 trace['colorbar']['thickness'] = 20 fig.append_trace(trace, 1, 1) for trace in plot2['data']: trace['colorbar']['x'] = 0.95 trace['colorbar']['xpad'] = 50 trace['colorbar']['thickness'] = 20 fig.append_trace(trace, 1, 2) fig['layout']['title'] = "" fig['layout']['xaxis1'].update(plot1['layout']['xaxis']) fig['layout']['xaxis1']['title'] = "" fig['layout']['xaxis1']['domain'] = [0,0.4] fig['layout']['yaxis1'].update(plot1['layout']['yaxis']) fig['layout']['yaxis1']['title'] = "" fig['layout']['xaxis2'].update(plot2['layout']['xaxis']) fig['layout']['yaxis2'].update(plot2['layout']['yaxis']) fig['layout']['xaxis2']['domain'] = [0.55, 0.95] fig['layout']['yaxis2']['side']='right' fig['layout']['yaxis2']['scaleanchor'] = 'x2' fig['layout']['xaxis2']['title'] = "" fig['layout']['yaxis2']['title'] = "" fig['layout']['font']['size']=18 fig['layout']['hovermode']='closest' fig['layout']['margin']['t'] = 10 plotly_plot(fig, filename=plotname, auto_open=False)
def write_ccmgen_benchmark_figure(fig, title, plot_file, height=400, width=400): for trace in fig['data']: trace['name'] = trace['name'].split("-")[-1].split("(")[0] fig['layout']['font']['size'] =18 fig['layout']['hovermode']='closest' fig['layout']['title']=title fig['layout']['margin']['b']=45 fig['layout']['margin']['t']=50 fig['layout']['legend']={ 'orientation':"v", 'x':0.65, 'y': 1.0 } fig['layout']['xaxis']={ 'title': "#predicted contacts / protein length"} fig['layout']['yaxis']={ 'title': "mean precision over proteins", 'range' : [0,0.8] } fig['layout']['height'] = height fig['layout']['width'] = width plotly_plot(fig, filename=plot_file, auto_open=False, show_link=False)
def plot_runtime(plot_data, plot_file): data = [] for method, runtimes in plot_data.items(): box = go.Box(y=runtimes, boxmean=True, boxpoints='Outliers', name=method, marker=dict(opacity=1), hoverinfo='all', orientation='v', showlegend=False) data.append(box) plot = { "data": data, "layout": go.Layout(yaxis=dict(title="runtime in min", type='log', exponentformat='none', showexponent='none', tickmode="array", tickvals=[1, 10, 100, 500, 1000, 5000, 10000], ticktext=[1, 10, 100, 500, 1000, 5000, 10000]), font=dict(size=18), width=800, height=500, margin=dict(t=10)) } plotly_plot(plot, filename=plot_file, auto_open=False, show_link=False)
def plot_boxplot_correlation(pearson_r, proteins, plot_file): data = [ go.Box(y=pearson_r, name="APC vs Entropy correction", showlegend=False, boxmean=False, boxpoints='Outliers', text=proteins #jitter=0.5, #pointpos=1.8 ) ] plot = { "data": data, "layout": go.Layout(font=dict(size=24), margin=dict(t=10), yaxis=dict(range=[0, 1], title="Pearson correlation"), width="500", height="400") } plotly_plot(plot, filename=plot_file, auto_open=False, show_link=False)
def plot_boxplot_scores(protein, method_1, method_2, braw_1, braw_2, plot_dir,l2norm=False, apc=False): L = braw_1.ncol upper_triangular_indices = np.triu_indices(L, k=1) title = protein plot_out = plot_dir + "/boxplot_for_" + protein + "_method1_" + method_1 + "_method2_" + method_2 + "_score.html" if l2norm: mat_1 = b.compute_l2norm_from_braw(braw_1, apc) mat_2 = b.compute_l2norm_from_braw(braw_2, apc) score_1 = mat_1[upper_triangular_indices] score_2 = mat_2[upper_triangular_indices] plot_out = plot_out.replace(".html", "_l2norm_apc"+str(apc)+".html") else: score_1 = braw_1.x_pair[upper_triangular_indices[0], upper_triangular_indices[1], :20, :20].flatten() score_2 = braw_2.x_pair[upper_triangular_indices[0], upper_triangular_indices[1], :20, :20].flatten() data = [ go.Box( y=score_1, name = method_1, showlegend=False, boxmean='sd', boxpoints=False ), go.Box( y=score_2, name = method_2, showlegend=False, boxmean='sd', boxpoints=False ) ] plot = { "data": data, "layout": go.Layout( title=title, font=dict(size=18), yaxis1=dict( title="score for residue pair", exponentformat="e", showexponent='All', scaleratio=1.0, scaleanchor='x' ), xaxis1=dict( exponentformat="e", showexponent='All', scaleratio=1.0, scaleanchor='y' ) ) } plotly_plot(plot, filename=plot_out, auto_open=False)
def plot_pll_vs_pcd_benchmark_figure(subplots, plot_dir, height=500, width=500): data = [] #add PCD traces trace_for_lin = copy.copy( subplots['persistent contrastive divergence']['data'][0]) data.append(trace_for_lin) data[-1]['legendgroup'] = 'method' data[-1]['name'] = 'PCD' data[-1]['line']['color'] = 'black' #data[-1]['showlegend'] = True #data[-1]['visible'] = True #'legendonly' for trace in subplots['persistent contrastive divergence']['data']: trace['name'] = trace['name'].split("-")[-1].split("(")[0] #trace['showlegend'] = True trace['legendgroup'] = 'correction' data.append(trace) #add pLL traces trace_for_lin = copy.copy( subplots['pseudo-likelihood maximization']['data'][0]) data.append(trace_for_lin) data[-1]['legendgroup'] = 'method' data[-1]['name'] = 'pLL' data[-1]['line']['color'] = 'black' data[-1]['line']['dash'] = 'dot' data[-1]['showlegend'] = True #data[-1]['visible'] = True #'legendonly' for trace in subplots['pseudo-likelihood maximization']['data']: trace['name'] = trace['name'].split("-")[-1].split("(")[0] trace['legendgroup'] = 'correction' trace['showlegend'] = False trace['line']['dash'] = 'dot' data.append(trace) layout = go.Layout( font=dict(size=18), hovermode='closest', title="", margin=dict(t=10), legend=dict(orientation="v", x=1.01, y=1.0), yaxis=dict(title="Mean Precision over Proteins", range=[0, 1]), xaxis=dict(title="#predicted contacts / protein length"), height=height, width=width) fig = go.Figure(data=data, layout=layout) plot_file = plot_dir + "/" + "ccmgen_benchmark_figure_pll_vs_pcd.html" plotly_plot(fig, filename=plot_file, auto_open=False, show_link=False) return plot_file
def plot_pdb_uniprot_fct(data_dict, seq_dict, plot_dir=None): data = [] for name, df in seq_dict.iteritems(): df['Date'] = pd.to_datetime(df['Date']) data_dict[name] = df.drop(df.index[df[df['Date'] < '1996-01-01'].index]) data.append( go.Scatter( x=data_dict[name].Date, y=data_dict[name].Total, showlegend=True, name=name, line=dict( width=4, dash='dot' ) ) ) data_dict['PDB-Protein']['Date'] = pd.to_datetime(data_dict['PDB-Protein']['Date'], format="%Y") data_dict['PDB-Protein'] = data_dict['PDB-Protein'].drop(data_dict['PDB-Protein'].index[data_dict['PDB-Protein'][data_dict['PDB-Protein']['Date'] < '1996-01-01'].index]) data_dict['PDB-Protein']['Date'][0] = pd.to_datetime('today') data.append( go.Scatter( x=data_dict['PDB-Protein'].Date, y=data_dict['PDB-Protein'].Total, showlegend=True, name='PDB-Protein', line=dict( width=4 ) ) ) plot = { "data": data, "layout": go.Layout( legend=dict(x=.05, y=1.0), title="", # Yearly Growth of Structures in PDB by Experimental Method", xaxis=dict( title="Year", range=['01-01-1999', '01-01-2017'] ), yaxis=dict( title="Total number of Entries", type="log" ), font=dict(size=18) ) } if plot_dir is not None: plot_file = plot_dir + "/pdb_uniprot.html" plotly_plot(plot, filename=plot_file, auto_open=False) else: return plot
def plot_boxplot_correlation(stats_dict, method_1, method_2, keys_list, plot_dir): df = pd.DataFrame(stats_dict) df = df.transpose() df['Pearson r'] = [x for x,y in df['pearson'].tolist()] df['Pearson pvalue'] = [y for x,y in df['pearson'].tolist()] df['Spearman rho'] = [x for x,y in df['spearmanrho'].tolist()] df['Spearman pvalue'] = [y for x,y in df['spearmanrho'].tolist()] df['Kendalls tau'] = [x for x,y in df['kendalltau'].tolist()] df['Kendalls pvalue'] = [y for x,y in df['kendalltau'].tolist()] df['kolmogorov-smirnov pvalue'] = [y for x,y in df['kolmogorov-smirnov'].tolist()] df['kolmogorov-smirnov'] = [x for x,y in df['kolmogorov-smirnov'].tolist()] df['linear fit slope'] = [slope for slope, intercept, rvalue, pvalue, stderr in df['linreg'].tolist()] df['linear fit intercept'] = [intercept for slope, intercept, rvalue, pvalue, stderr in df['linreg'].tolist()] df['protein'] = df.index df['Neff'] = [int(x) for x in df.Neff.tolist()] data = [] for key in keys_list: data.append( go.Box( y=df[key], name = key, text=df['protein'], showlegend=False, boxmean=False, boxpoints='Outliers' #jitter=0.5, #pointpos=1.8 ) ) plot = { "data": data, "layout": go.Layout( margin=dict(t=10), font=dict(size=18), yaxis1=dict( title="statistics value", exponentformat="e", showexponent='All', range=[0,1] ) ) } plot_out = plot_dir + "/comparative_statistics_boxplot_for_"+method_1.replace(" ", "_") + "_" + method_2.replace(" ", "_") + "_l2norm_APC_scores.html" plotly_plot(plot, filename=plot_out, auto_open=False, show_link=False)
def plot_percentage_gaps_per_position(alignment, plot_file=None): N = float(len(alignment)) L = len(alignment[0]) weighting = SequenceWeights(False, 0.8) weights = weighting.weights_simple(alignment) #compute counts and frequencies pseudocounts = PseudoCounts(alignment, weights) pseudocounts.calculate_frequencies( 'uniform_pseudocounts', 1, 1, remove_gaps=False ) #compute percentage of gaps gaps = pseudocounts.counts[0][:, 20] / pseudocounts.counts[0].sum(1) #normalized entropy entropy_per_position = scipy.stats.entropy(pseudocounts.counts[0].transpose(),base=2) entropy_per_position /= np.max(entropy_per_position) #create plot data = [] data.append( go.Scatter( x=[x for x in range(1,L+1)], y=gaps, name = "percentage of gaps", mode="Lines", line=dict(width=3) ) ) data.append( go.Scatter( x=[x for x in range(1,L+1)], y=entropy_per_position, name = "relative Entropy", mode="Lines", line=dict(width=3) ) ) layout = { 'title':"Percentage of gaps and Entropy in alignment <br> N="+str(N) + ", L="+str(L), 'xaxis':{'title':"Alignment Position"}, 'yaxis':{'title':"Percentage of Gaps/Entropy"}, 'font':{'size':18} } plot = {'data': data, 'layout': layout} if plot_file is None: return plot else: plotly_plot(plot, filename=plot_file, auto_open=False)
def plot_density(protein, bqij_data, plot_dir): group_labels = [key for key in sorted(bqij_data.keys()) if key != "L"] L = bqij_data['L'] hist_data = [] data=[] for group in group_labels: bqij_file = bqij_data[group] Nij, qij = io.read_qij(bqij_file, bqij_data['L']) data_group = qij[np.triu_indices(n=L, k=1)].flatten() hist_data.append(data_group) data.append( go.Histogram( x=data_group, histnorm='probability', name=group, xbins=dict( start=-0.1, end=1, size=0.005 ), opacity=0.75 ) ) # Create distplot with custom bin_size fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False) fig['layout']['font'] = dict(size = 18) fig['layout']['xaxis']['title'] = "q_ijab" plot_file = plot_dir + "/" + protein + "_distribution_qijab" + ".html" plotly_plot(fig, filename=plot_file, auto_open=False) #create histogram plot_file = plot_dir + "/" + protein + "_histogram_qijab" + ".html" layout = go.Layout( barmode='overlay', xaxis=dict( title="q_ijab", exponentformat="e", showexponent='All' ), yaxis=dict( exponentformat="e", showexponent='All' ), font=dict(size = 18) ) fig = go.Figure(data=data, layout=layout) plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_alignment_entropy(alignment_file, plot_dir=None): # read alignment protein = os.path.basename(alignment_file).split(".")[0] alignment = io.read_alignment(alignment_file) N = float(len(alignment)) L = len(alignment[0]) alignment = alignment.transpose() #determine amino acid frequencies (without any pseudocounts) aa_freq_per_pos = np.zeros((21, L)) for position in range(L): aa_counts = Counter(alignment[position]) for aa, counts in aa_counts.iteritems(): freq = counts / N aa_freq_per_pos[aa, position] = freq aa_freq_per_pos = aa_freq_per_pos[1:] #remove gaps aa_freq_per_pos = aa_freq_per_pos.transpose() entropy_per_position = [ entropy(aa_freq_per_pos[pos], base=2) for pos in range(L) ] #create plot data = [] data.append( go.Scatter(x=[x for x in range(L)], y=entropy_per_position, name="percentage of gaps", mode="Lines")) layout = { 'title': "Entropy (base 2) in alignment of " + str(protein) + "<br> N=" + str(N) + ", L=" + str(L), 'xaxis': { 'title': "Alignment Position" }, 'yaxis': { 'title': "Entropy " }, 'font': { 'size': 18 } } plot = {'data': data, 'layout': layout} if plot_dir is None: return plot else: plot_file = plot_dir + "/alignment_entropy_" + protein + ".html" plotly_plot(plot, filename=plot_file, auto_open=False)
def plot_gradient_ab_trace(gradient_df, ab_list, colors, plot_out=None): plot = {'data': [], 'layout': {} } # set up drop down menu plot['layout']['updatemenus'] = [{'xanchor': 'left', 'yanchor': 'bottom', 'x': 1.02, 'y': 0.2, 'buttons': [], 'active': 0, }] nr_components = len(gradient_df.columns) for ab in ab_list: for parameter in gradient_df.columns.tolist(): component = int(parameter.split("_")[-1]) plot['data'].append( go.Scatter( x=range(1, len(gradient_df) + 1), y=gradient_df[parameter].apply(lambda x: x[ab]).tolist(), mode='lines', line = dict( color = colors[component] ), name="component " + str(component) + " "+ parameter + " (" + AB[ab] + ")", showlegend=True, visible=False ) ) #every component will have a gradient trace plot['layout']['updatemenus'][0]['buttons'].append( { 'args': ['visible', [False] * (nr_components) * ab_list.index(ab) + [True] * (nr_components) + [False] * (nr_components) * (len(ab_list) - ab_list.index(ab) - 1)], 'label': AB[ab], 'method': 'restyle' } ) parameter_name = gradient_df.columns[0].split("_")[0] plot['layout']['xaxis1'] = {'title': 'iteration'} plot['layout']['yaxis1'] = {'title': "gradient for "+parameter_name} plot['layout']['title'] = "gradient trace for "+parameter_name if plot_out is not None: plotly_plot(plot, filename=plot_out, auto_open=False) else: return plot
def plot_boxplot_correlation(stats_dict, keys_list, plot_file): df = pd.DataFrame(stats_dict) df = df.transpose() df['Pearson r'] = [x for x, y in df['pearson'].tolist()] df['Pearson pvalue'] = [y for x, y in df['pearson'].tolist()] df['Spearman rho'] = [x for x, y in df['spearmanrho'].tolist()] df['Spearman pvalue'] = [y for x, y in df['spearmanrho'].tolist()] df['Kendalls tau'] = [x for x, y in df['kendalltau'].tolist()] df['Kendalls pvalue'] = [y for x, y in df['kendalltau'].tolist()] df['kolmogorov-smirnov pvalue'] = [ y for x, y in df['kolmogorov-smirnov'].tolist() ] df['kolmogorov-smirnov'] = [ x for x, y in df['kolmogorov-smirnov'].tolist() ] df['linear fit slope'] = [ slope for slope, intercept, rvalue, pvalue, stderr in df['linreg'].tolist() ] df['linear fit intercept'] = [ intercept for slope, intercept, rvalue, pvalue, stderr in df['linreg'].tolist() ] df['protein'] = df.index data = [] for key in keys_list: data.append( go.Box(y=df[key], name=key, text=df['protein'], showlegend=False, boxmean=False, boxpoints='outliers')) plot = { "data": data, "layout": go.Layout(margin=dict(t=10), font=dict(size=18), yaxis1=dict(title="statistics value", exponentformat="e", showexponent='all', range=[0, 1]), width=800, height=500) } plotly_plot(plot, filename=plot_file, auto_open=False, show_link=False)
def plot_scatter(apc, ec, text, plot_file): scatter_data = go.Scatter(x=apc, y=ec, mode='markers', marker=dict(color="black"), text=text, showlegend=False) diagonal = go.Scatter(x=[0, np.max(list(apc) + list(ec))], y=[0, np.max(list(apc) + list(ec))], mode="lines", line=dict(color="darkgrey", width=4, dash="dot"), showlegend=False) pearson_r = pearsonr(apc, ec) data = [] data.append(diagonal) data.append(scatter_data) plot = { "data": data, "layout": go.Layout(font=dict(size=24), yaxis=dict(title="Entropy Correction", exponentformat="e", showexponent='All', scaleratio=1, scaleanchor='x'), xaxis=dict(title="Average Product Correction", exponentformat="e", showexponent='All', scaleratio=1, scaleanchor='y'), annotations=go.Annotations([ go.Annotation(x=0.05, y=0.95, showarrow=False, text='Pearson r = {0}'.format( np.round(pearson_r[0], decimals=3)), font=dict(color="black", size=24), xref='paper', yref='paper') ]), margin=dict(t=10), width="550", height="500") } plotly_plot(plot, filename=plot_file, auto_open=False, show_link=False)
def plot_convergence_trace_plotly(negll_trace_df, name, plot_title, plot_out=None): """ Define a plot in plotly dictionary style Either plot it or return dictionary :param negll_trace_df: Pandas Dataframe with columns: pass, step, col1, col2 :param name: List of column names for plotting, e.g [cols, col2] :param plot_title: title :param plot_out: Path to HTML output file :return: """ data = [] for trace in name: for iteration in set(negll_trace_df['pass']): data.append( go.Scatter( x=negll_trace_df[negll_trace_df['pass'] == iteration]['step'].tolist(), y=negll_trace_df[negll_trace_df['pass'] == iteration][trace].tolist(), mode='lines', name=trace + ' pass ' + str(iteration), connectgaps=True, showlegend=True, line=dict( width=4 ) ) ) plot = { "data": data, "layout": go.Layout( title = plot_title, xaxis1 = dict(title="step", exponentformat="e", showexponent='All'), yaxis1 = dict(title="negative log likelihood", exponentformat="e", showexponent='All' ), font = dict(size=18), ) } if plot_out is not None: plotly_plot(plot, filename=plot_out, auto_open=False) else: return plot
def plot_projection_on_two_components_gapstructure(plot_dict, plot_out): data = [] for plot_data in plot_dict['data']: if plot_data['name'] == "Pfam": percent_gaps = [len(np.where(seq == 20)[0]) / float(plot_data['L']) for seq in plot_data['seq']] seq_nr = ["seq no " + str(n) for n in range(1, plot_data['N'] + 1)] seq = ["".join(["<br>"+io.AMINO_ACIDS[plot_data['seq'][n][l]] if (l+1)% 50 == 0 else io.AMINO_ACIDS[plot_data['seq'][n][l]] for l in range(plot_data['L'])]) for n in range(plot_data['N'])] text = [seq_nr[n] + "<br>fraction of gaps: " + str(np.round(percent_gaps[n], decimals=3)) + "<br>" + seq[n] for n in range(plot_data['N'])] data.append( go.Scatter( x=plot_data['x'], y=plot_data['y'], name=plot_data['name'], mode='markers', marker=dict( color=percent_gaps, colorbar=go.ColorBar( title='Fraction of Gaps' ), colorscale='Bluered' ), text=text, #list(range(1, len(plot_data['x']) + 1)), showlegend=False ) ) plot = { "data": data, "layout": go.Layout( font=dict(size=18), title="", margin=dict(t=10), hovermode='closest', yaxis=dict( title="principal component 2", exponentformat="e", showexponent='All' ), xaxis=dict( title="principal component 1", exponentformat="e", showexponent='All' ) ) } plotly_plot(plot, filename=plot_out, auto_open=False)
def plot_log_observed_expected_at_abs(distances_ab, abs, seq_sep, distance_definition, plot_dir): bins = np.arange(2,50,0.5) data = [] # expected nr of pairs: # frequency of pairs observed at this distance in PDB p_r = [] for i in range(len(bins)): p_r.append(len(distances_ab[seq_sep]['all'][np.digitize(distances_ab[seq_sep]['all'], bins) == i])) p_r = np.array(p_r) / float(np.sum(p_r)) for ab in abs: p_r_ab = [] for i in range(len(bins)): #print np.array(distances_ab[seq_sep])[np.digitize(distances_ab[seq_sep],bins)==i] p_r_ab.append(len(np.array(distances_ab[seq_sep][ab])[np.digitize(distances_ab[seq_sep][ab],bins)==i])) p_r_ab = np.array(p_r_ab) / float(np.sum(p_r_ab)) log_ratio = np.log(p_r_ab / p_r) data.append( go.Scatter( x=bins, y=log_ratio, mode='lines', name=ab + " ("+str(len(distances_ab[seq_sep][ab]))+")" ) ) layout = go.Layout( title="", xaxis=dict( title="distance bins" ), yaxis=dict( title="log ratio observed vs expected" ) ) fig = go.Figure(data=data, layout=layout) plot_file = plot_dir + "/" + distance_definition + "_logratio_seqsep" + str(seq_sep) + ".html" plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_projection_on_two_components(plot_dict, title, plot_out): data = [] for plot_data in plot_dict['data']: seq_nr = ["seq no " + str(n) for n in range(1, plot_data['N'] + 1)] seq = ["".join(["<br>"+io.AMINO_ACIDS[plot_data['seq'][n][l]] if (l+1)% 50 == 0 else io.AMINO_ACIDS[plot_data['seq'][n][l]] for l in range(plot_data['L'])]) for n in range(plot_data['N'])] text = [seq_nr[n] + "<br>" + seq[n] for n in range(plot_data['N'])] data.append( go.Scatter( x=plot_data['x'], y=plot_data['y'], name=plot_data['name'], mode='markers', opacity=0.5, text=text, #list(range(1, len(plot_data['x']) + 1)), showlegend=True ) ) plot = { "data": data, "layout": go.Layout( font=dict(size=18), title=title, titlefont= dict(size=12), legend=dict(orientation="v"), hovermode='closest', yaxis=dict( title="principal component 2", exponentformat="e", showexponent='All' ), xaxis=dict( title="principal component 1", exponentformat="e", showexponent='All' ) ) } if title == "": plot['layout']['margin']['t'] =10 plotly_plot(plot, filename=plot_out, auto_open=False)
def plot_boxplot(statistics_dict, property, plot_file): topologies = sorted(statistics_dict.keys()) data = [] for topology in topologies: values = statistics_dict[topology][property] proteins = statistics_dict[topology]['protein'] target_neff = statistics_dict[topology]['target neff'] sample_neff = statistics_dict[topology]['sample neff'] hover_text = [ "{0}<br>target neff:{1}<br>sample neff:{2}".format( proteins[i], target_neff[i], sample_neff[i]) for i in range(len(values)) ] box = go.Box(y=values, boxmean=True, pointpos=1.8, jitter=0.4, boxpoints='all', name=topology, marker=dict(opacity=1), text=hover_text, hoverinfo='all', orientation='v', showlegend=False) data.append(box) plot = { "data": data, "layout": go.Layout(yaxis=dict(exponentformat='e', showexponent='all'), margin=dict(t=10), font=dict(size=18), width=800, height=500) } if property == "neff_difference": plot['layout']['yaxis']['title'] = "Pfam Neff - synthetic Neff" if property == "mutation_rate": plot['layout']['yaxis']['title'] = "mutation rate" plotly_plot(plot, filename=plot_file, auto_open=False, show_link=False)
def plot_distance_distribution(distances_ab, ab, distance_definition, log, plot_dir): group_labels = ["sequence separation " + str(seq_sep) for seq_sep, values in sorted(distances_ab.iteritems())] hist_data = [np.array(values[ab])[~np.isnan(values[ab])] for seq_sep, values in sorted(distances_ab.iteritems())] if log: hist_data = [ np.log(np.array(values[ab]))[~np.isnan(values[ab])] for seq_sep, values in sorted(distances_ab.iteritems())] # Create distplot with custom bin_size fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False) for trace in fig['data']: trace['line']['width'] = 2 if log: trace['text'] = ['Cb distance: ' + str(x) for x in np.exp(trace['x'])] else: trace['text'] = ['Cb distance: ' + str(x) for x in trace['x']] trace['hoverinfo'] = "text" residues = ab[0] + " and " + ab[2] if ab == 'all': residues = "residue pair" fig['layout']['font'] = dict(size = 16) fig['layout']['xaxis']['title'] = distance_definition + " distance between " + residues fig['layout']['xaxis']['showspikes'] = True fig['layout']['yaxis']['title'] = "Distribution of " + residues + " distances ("+distance_definition+")" fig['layout']['yaxis']['showspikes'] = True fig['layout']['xaxis']['range'] = [3,100] fig['layout']['xaxis']['tickangle'] = 0 fig['layout']['margin']['t'] = 10 plot_file = plot_dir + "/" + distance_definition + "_distribution_" + ab + "_data" + str(int(np.mean([len(h) for h in hist_data])))+".html" if log: fig['layout']['xaxis']['tickmode'] = "array" fig['layout']['xaxis']['ticktext'] = [3,4,5,6,8,10,12,15,20,30,40,50,70,80] fig['layout']['xaxis']['tickvals'] = np.log(fig['layout']['xaxis']['ticktext']) fig['layout']['xaxis']['range'] = np.log([3,100]) plot_file = plot_file.replace(".html","_log.html") plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_learning_rate_schedules(dict_of_schedules, alpha_0, plot_out): linetype=['dash', 'dot', 'longdash', 'dashdot'] color=cl.scales['4']['qual']['Set1'] data = [] for id_schedule, name in enumerate(sorted(dict_of_schedules.keys())): print id_schedule, name orderered_keys = dict_of_schedules[name].pop('order') for id_rate, decay_rate in enumerate(orderered_keys): print id_rate, decay_rate data.append( go.Scatter( x = range(1, len(dict_of_schedules[name][decay_rate])+1), y = dict_of_schedules[name][decay_rate], name = name + " (" + str(decay_rate) + ")", legendgroup = name, line=dict( width=4, dash=linetype[id_rate], color=color[id_schedule] ) ) ) layout = go.Layout( title="Comparison of learning rate schedules <br> alpha0={0}".format(alpha_0), font=dict(size=18), yaxis=dict( exponentformat="e", showexponent='All', title="learning rate" ), xaxis=dict( title="iteration" ) ) plot=go.Figure(data=data, layout=layout) plot_file = plot_out + "/learning_rate_schedules_alpha0"+str(alpha_0)+".html" plotly_plot(plot, filename=plot_file, auto_open=False) plot['layout']['title']="" plot['layout']['margin']['t']=10 plot_file = plot_out + "/learning_rate_schedules_alpha0"+str(alpha_0)+"_notitle.html" plotly_plot(plot, filename=plot_file, auto_open=False)
def plot(self): return plotly_plot( self.figure_or_data, show_link=False, output_type='div', include_plotlyjs=False, )
def plot_ccmgen_benchmark_figure(fig, title, plot_file, height=350, width=400): fig['layout']['font']['size'] = 18 fig['layout']['hovermode'] = 'closest' fig['layout']['title'] = title fig['layout']['margin']['b'] = 45 fig['layout']['margin']['t'] = 50 fig['layout']['legend'] = {'orientation': "v", 'x': 0.65, 'y': 1.0} fig['layout']['yaxis'] = { 'title': "mean precision over proteins", 'range': [0, 1] } fig['layout']['height'] = height fig['layout']['width'] = width plotly_plot(fig, filename=plot_file, auto_open=False, show_link=False)
def plot_pdb_uniprot_fct(data_dict, seq_dict, plot_dir=None): data = [] for name, df in seq_dict.iteritems(): df['Date'] = pd.to_datetime(df['Date']) data_dict[name] = df.drop( df.index[df[df['Date'] < '1996-01-01'].index]) data.append( go.Scatter(x=data_dict[name].Date, y=data_dict[name].Total, showlegend=True, name=name, line=dict(width=4, dash='dot'))) data_dict['PDB-Protein']['Date'] = pd.to_datetime( data_dict['PDB-Protein']['Date'], format="%Y") data_dict['PDB-Protein'] = data_dict['PDB-Protein'].drop( data_dict['PDB-Protein'].index[data_dict['PDB-Protein'][ data_dict['PDB-Protein']['Date'] < '1996-01-01'].index]) data_dict['PDB-Protein']['Date'][0] = pd.to_datetime('today') data.append( go.Scatter(x=data_dict['PDB-Protein'].Date, y=data_dict['PDB-Protein'].Total, showlegend=True, name='PDB-Protein', line=dict(width=4))) plot = { "data": data, "layout": go.Layout( legend=dict(x=.05, y=1.0), title= "", # Yearly Growth of Structures in PDB by Experimental Method", xaxis=dict(title="Year", range=['01-01-1999', '01-01-2017']), yaxis=dict(title="Total number of Entries", type="log"), font=dict(size=18)) } if plot_dir is not None: plot_file = plot_dir + "/pdb_uniprot.html" plotly_plot(plot, filename=plot_file, auto_open=False) else: return plot
def plot_percentage_gaps_per_position(single_freq, plot_file=None): L = single_freq.shape[0] #compute percentage of gaps gaps = single_freq[:, 20] / single_freq.sum(1) #normalized entropy entropy_per_position = scipy.stats.entropy(single_freq.transpose(),base=2) entropy_per_position /= np.max(entropy_per_position) #create plot data = [] data.append( go.Scatter( x=[x for x in range(1,L+1)], y=gaps, name = "percentage of gaps", mode="Lines", line=dict(width=3) ) ) data.append( go.Scatter( x=[x for x in range(1,L+1)], y=entropy_per_position, name = "relative Entropy", mode="Lines", line=dict(width=3) ) ) layout = { 'title':"Percentage of gaps and Entropy in alignment", 'xaxis':{'title':"Alignment Position"}, 'yaxis':{'title':"Percentage of Gaps/Entropy"}, 'font':{'size':18} } plot = {'data': data, 'layout': layout} if plot_file is None: return plot else: plotly_plot(plot, filename=plot_file, auto_open=False)
def plot_log_observed_expected_at_abs(distances_ab, abs, seq_sep, distance_definition, plot_dir): bins = np.arange(2, 50, 0.5) data = [] # expected nr of pairs: # frequency of pairs observed at this distance in PDB p_r = [] for i in range(len(bins)): p_r.append( len(distances_ab[seq_sep]['all'][np.digitize( distances_ab[seq_sep]['all'], bins) == i])) p_r = np.array(p_r) / float(np.sum(p_r)) for ab in abs: p_r_ab = [] for i in range(len(bins)): #print np.array(distances_ab[seq_sep])[np.digitize(distances_ab[seq_sep],bins)==i] p_r_ab.append( len( np.array(distances_ab[seq_sep][ab])[np.digitize( distances_ab[seq_sep][ab], bins) == i])) p_r_ab = np.array(p_r_ab) / float(np.sum(p_r_ab)) log_ratio = np.log(p_r_ab / p_r) data.append( go.Scatter(x=bins, y=log_ratio, mode='lines', name=ab + " (" + str(len(distances_ab[seq_sep][ab])) + ")")) layout = go.Layout(title="", xaxis=dict(title="distance bins"), yaxis=dict(title="log ratio observed vs expected")) fig = go.Figure(data=data, layout=layout) plot_file = plot_dir + "/" + distance_definition + "_logratio_seqsep" + str( seq_sep) + ".html" plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_scatter(statistics_dict, key, plot_out): methods = sorted(statistics_dict.keys()) data = [] for method in methods: values = statistics_dict[method][key] proteins = statistics_dict[method]['protein'] target_neff = statistics_dict[method]['target neff'] sample_neff = statistics_dict[method]['sample neff'] data.append( go.Scatter( x=target_neff, y=values, name=method, mode="markers", text=[ proteins[i] + "<br>target neff: " + str(target_neff[i]) + "<br>sample neff: " + str(sample_neff[i]) for i in range(len(values))], ) ) plot = { "data": data, "layout": go.Layout( yaxis=dict( exponentformat='e', showexponent='All' ), xaxis=dict(title="Target Neff"), font=dict(size=18) ) } if key == "neff_difference": plot['layout']['title'] = "Difference in target and sampled Neff" plot['layout']['yaxis']['title'] = "target - sampled neff" if key == "mutation_rate": plot['layout']['title'] = "Mutation rate used for Sampling" plot['layout']['yaxis']['title'] = "mutation rate" plotly_plot(plot, filename=plot_out, auto_open=False)
def plot_percentage_gaps_per_position(alignment, plot_file=None): N = float(len(alignment)) L = len(alignment[0]) gaps = ali_ut.compute_gaps_per_position(alignment) entropy_per_position = ali_ut.compute_entropy_per_position(alignment) #create plot data = [] data.append( go.Scatter(x=[x for x in range(1, L + 1)], y=gaps, name="percentage of gaps", mode="Lines", line=dict(width=3))) data.append( go.Scatter(x=[x for x in range(1, L + 1)], y=entropy_per_position, name="relative Entropy", mode="Lines", line=dict(width=3))) layout = { 'title': "Percentage of gaps and Entropy in alignment <br> N=" + str(N) + ", L=" + str(L), 'xaxis': { 'title': "Alignment Position" }, 'yaxis': { 'title': "Percentage of Gaps/Entropy" }, 'font': { 'size': 18 } } plot = {'data': data, 'layout': layout} if plot_file is None: return plot else: plotly_plot(plot, filename=plot_file, auto_open=False)
def plot_coupling_vs_distance_distribution(couplings_per_bin, plot_dir, ab, abs=False): methods = couplings_per_bin.keys() data = [] for method in methods: x=[] y=[] for bin in sorted(couplings_per_bin[method].keys()): x.extend([bin] * len(couplings_per_bin[method][bin])) if abs: y.extend(np.abs(couplings_per_bin[method][bin])) else: y.extend(couplings_per_bin[method][bin]) data.append( go.Box( y=y, x=x, name=method ) ) layout = go.Layout( title='Distribution of couplings for wij('+ab+") <br> ~" + str(len(data[0]['x']) / len(couplings_per_bin[methods[0]].keys())) +" couplings per bin " , yaxis=dict( zeroline=False ), xaxis=dict( title="Cbeta distance bins", tickvals=sorted(couplings_per_bin[methods[0]].keys()) ), font = dict(size = 18), boxmode='group' ) fig = go.Figure(data=data, layout=layout) plot_name = plot_dir + "/coupling_distribution_"+ ab if abs: plot_name = plot_name + "_abs" plotly_plot(fig, filename=plot_name+".html", auto_open=False)
def plot_boxplot_correlation_alignment_statistics_pll_vs_pcd(data_dict, plot_dir): data = [] data.append( go.Box( y=data_dict['pseudo-likelihood']['x'], x=data_dict['pseudo-likelihood']['y'], boxpoints='outliers', name="pseudo-likelihood", hoverinfo='all', orientation="v", showlegend=True ) ) data.append( go.Box( y=data_dict['contrastive divergence']['x'], x=data_dict['contrastive divergence']['y'], boxpoints='outliers', name="persistent contrastive divergence", hoverinfo='all', orientation="v", showlegend=True ) ) layout=go.Layout( #title="Pearson Correlation Coefficients<br>between Original and Sampled Alignment Statistics", title="", margin=dict(t=10), legend=dict(orientation="h", xanchor="center", x=0.5, y=1.2), yaxis=dict(title="Pearson's r", range=[0,1]), font=dict(size=18), boxmode='group' ) fig = go.Figure(data=data, layout=layout) plot_out = plot_dir+"/boxplot_pearson_correlation_coeff_empirical_vs_model_statistics.html" plotly_plot(fig, filename=plot_out, auto_open=False, show_link=False)
def plot_meta_property_vs_method(method_numit, axis_title, sorted_methods, plot_dir): plot_name = plot_dir+"/distribution_"+ axis_title.replace(" ", "_") + "_against_methods.html" # plot.plot_boxplot(method_numit, "", axis_title, colors=None, jitter_pos=1.5, orient='v', # print_total=True, order=sorted_methods, boxmean=False, plot_out=plot_name) data = [] for method in method_numit: values = method_numit[method] box = go.Box( y=values, boxmean=True, boxpoints='Outliers', name=method, marker=dict(opacity=1), hoverinfo='all', orientation='v', showlegend=False ) data.append(box) plot = { "data": data, "layout": go.Layout( yaxis=dict( title=axis_title, type='log', #autorange=True, exponentformat='none', showexponent='none', tickmode="array", tickvals=[1, 10, 100, 500], ticktext=[1, 10, 100, 500] ), font=dict(size=18) ) } plotly_plot(plot, filename=plot_name, auto_open=False, show_link=False)
def plot_density(protein, bqij_data, plot_dir): group_labels = [key for key in sorted(bqij_data.keys()) if key != "L"] L = bqij_data['L'] hist_data = [] data = [] for group in group_labels: bqij_file = bqij_data[group] Nij, qij = io.read_qij(bqij_file, bqij_data['L']) data_group = qij[np.triu_indices(n=L, k=1)].flatten() hist_data.append(data_group) data.append( go.Histogram(x=data_group, histnorm='probability', name=group, xbins=dict(start=-0.1, end=1, size=0.005), opacity=0.75)) # Create distplot with custom bin_size fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False) fig['layout']['font'] = dict(size=18) fig['layout']['xaxis']['title'] = "q_ijab" plot_file = plot_dir + "/" + protein + "_distribution_qijab" + ".html" plotly_plot(fig, filename=plot_file, auto_open=False) #create histogram plot_file = plot_dir + "/" + protein + "_histogram_qijab" + ".html" layout = go.Layout(barmode='overlay', xaxis=dict(title="q_ijab", exponentformat="e", showexponent='All'), yaxis=dict(exponentformat="e", showexponent='All'), font=dict(size=18)) fig = go.Figure(data=data, layout=layout) plotly_plot(fig, filename=plot_file, auto_open=False)
def combine_plots(plot_pdb, plot_pdb_uniprot, plot_dir): #combine plots in a panel fig = tools.make_subplots(rows=1, cols=2, print_grid=False) for trace in plot_pdb['data']: fig.append_trace(trace, 1, 1) for trace in plot_pdb_uniprot['data']: fig.append_trace(trace, 1, 2) fig['layout']['xaxis1'].update(plot_pdb['layout']['xaxis']) fig['layout']['yaxis1'].update(plot_pdb['layout']['yaxis']) fig['layout']['xaxis2'].update(plot_pdb_uniprot['layout']['xaxis']) fig['layout']['yaxis2'].update(plot_pdb_uniprot['layout']['yaxis']) fig['layout']['font']['size'] = 18 fig['layout']['margin']['t'] = 10 plotname = plot_file = plot_dir + "/pdb_uniprot_stats.html" plotly_plot(fig, filename=plotname, auto_open=False)
def plot_learning_rate_schedules(dict_of_schedules, alpha_0, plot_out): linetype = ['dash', 'dot', 'longdash', 'dashdot'] color = cl.scales['4']['qual']['Set1'] data = [] for id_schedule, name in enumerate(sorted(dict_of_schedules.keys())): print id_schedule, name orderered_keys = dict_of_schedules[name].pop('order') for id_rate, decay_rate in enumerate(orderered_keys): print id_rate, decay_rate data.append( go.Scatter(x=range( 1, len(dict_of_schedules[name][decay_rate]) + 1), y=dict_of_schedules[name][decay_rate], name=name + " (" + str(decay_rate) + ")", legendgroup=name, line=dict(width=4, dash=linetype[id_rate], color=color[id_schedule]))) layout = go.Layout( title="Comparison of learning rate schedules <br> alpha0={0}".format( alpha_0), font=dict(size=18), yaxis=dict(exponentformat="e", showexponent='All', title="learning rate"), xaxis=dict(title="iteration")) plot = go.Figure(data=data, layout=layout) plot_file = plot_out + "/learning_rate_schedules_alpha0" + str( alpha_0) + ".html" plotly_plot(plot, filename=plot_file, auto_open=False) plot['layout']['title'] = "" plot['layout']['margin']['t'] = 10 plot_file = plot_out + "/learning_rate_schedules_alpha0" + str( alpha_0) + "_notitle.html" plotly_plot(plot, filename=plot_file, auto_open=False)
def plot_meta_property_vs_method(method_numit, axis_title, sorted_methods, plot_dir): plot_name = plot_dir + "/distribution_" + axis_title.replace( " ", "_") + "_against_methods.html" # plot.plot_boxplot(method_numit, "", axis_title, colors=None, jitter_pos=1.5, orient='v', # print_total=True, order=sorted_methods, boxmean=False, plot_out=plot_name) data = [] for method in method_numit: values = method_numit[method] box = go.Box(y=values, boxmean=True, boxpoints='Outliers', name=method, marker=dict(opacity=1), hoverinfo='all', orientation='v', showlegend=False) data.append(box) plot = { "data": data, "layout": go.Layout( yaxis=dict( title=axis_title, type='log', #autorange=True, exponentformat='none', showexponent='none', tickmode="array", tickvals=[1, 10, 100, 500], ticktext=[1, 10, 100, 500]), font=dict(size=18)) } plotly_plot(plot, filename=plot_name, auto_open=False, show_link=False)
def combine_plots(plot_pdb, plot_pdb_uniprot, plot_dir): #combine plots in a panel fig = tools.make_subplots(rows=1, cols=2, print_grid=False) for trace in plot_pdb['data']: fig.append_trace(trace, 1, 1) for trace in plot_pdb_uniprot['data']: fig.append_trace(trace, 1, 2) fig['layout']['xaxis1'].update(plot_pdb['layout']['xaxis']) fig['layout']['yaxis1'].update(plot_pdb['layout']['yaxis']) fig['layout']['xaxis2'].update(plot_pdb_uniprot['layout']['xaxis']) fig['layout']['yaxis2'].update(plot_pdb_uniprot['layout']['yaxis']) fig['layout']['font']['size']=18 fig['layout']['margin']['t'] = 10 plotname = plot_file = plot_dir + "/pdb_uniprot_stats.html" plotly_plot(fig, filename=plotname, auto_open=False)
def plot_amino_acid_distribution_per_position(aa_counts_single, title, plot_file=None, freq=True): Neff = np.sum(aa_counts_single[0,:]) L = aa_counts_single.shape[0] #create plot data = [] if freq: aa_counts_single /= Neff #add bar for each amino acid for each position for aa in range(20): data.append( go.Bar( x= list(range(1,L+1)), y=aa_counts_single[:, aa].tolist(), showlegend=True, name=io.AMINO_ACIDS[aa] ) ) layout = go.Layout( barmode='stack', title=title, xaxis=dict(title="Alignment Position"), yaxis=dict( title="Amino Acid Distribution", exponentformat='e', showexponent='All'), font=dict(size=18) ) plot = {'data': data, 'layout': layout} if plot_file is None: return plot else: plotly_plot(plot, filename=plot_file, auto_open=False)
def plot_1d_coupling_profile(couplings_per_pair, lower_cb_distance, upper_cb_distance, plot_file ): group_labels = [key + "("+str(len(couplings_per_pair[key]))+")" for key in couplings_per_pair.keys()] hist_data = couplings_per_pair.values() # Create distplot with custom bin_size fig = ff.create_distplot(hist_data, group_labels, show_hist=False, show_rug=False) for trace in fig['data']: trace['line']['width'] = 2 fig['layout']['font'] = dict(size = 16) fig['layout']['xaxis']['title'] = "couplings w_ijab for residue pairs ij at {0}Å < ΔCβ < {1}Å".format(lower_cb_distance, upper_cb_distance) fig['layout']['xaxis']['range'] = [-1,1] fig['layout']['yaxis']['title'] = "Distribution of couplings " fig['layout']['margin']['t'] = 10 plotly_plot(fig, filename=plot_file, auto_open=False)
def plot_boxplot_all_stats(stats_df, plot_out=None): proteins=stats_df['protein'] stats_names = stats_df.keys().tolist() stats_names.remove("protein") ## define subplots fig = tools.make_subplots(rows=1, cols=len(stats_names)) ## add traces as subplots for nr, key in enumerate(stats_names): trace = go.Box( y=stats_df[key], #boxmean='sd', name=key, hoverinfo='all', orientation="v", showlegend=False, boxpoints="all", jitter=0.5, pointpos=2, text=proteins ) fig.append_trace(trace, 1, nr+1) fig['layout'].update( font = dict(size=18), hovermode = 'closest', title = "Dataset Statistics", width=300 * len(stats_names), height=500 ) if plot_out is not None: plotly_plot(fig, filename=plot_out, auto_open=False, link_text='') else: return fig
def with_jax(fig, filename): plot_div = plotly_plot(fig, output_type = 'div') template = """ <head> <script type="text/javascript" async src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_SVG"> </script> </head> <body> {plot_div:s} </body>""".format(plot_div = plot_div) with open(filename, 'w') as fp: fp.write(template)
def plot_empirical_vs_model_statistics( single_freq_observed, single_freq_sampled, pairwise_freq_observed, pairwise_freq_sampled, title, plot_out=None, log=False, width=1500): L = single_freq_observed.shape[0] indices_upper_triangle = np.triu_indices(L, k=1) ## compute data if log: x_single = np.log(single_freq_observed.flatten()).tolist() y_single = np.log(single_freq_sampled.flatten()).tolist() pair_freq_observed = pairwise_freq_observed[ indices_upper_triangle[0], indices_upper_triangle[1], :, :].flatten().tolist() pair_freq_sampled = pairwise_freq_sampled[ indices_upper_triangle[0], indices_upper_triangle[1], :, :].flatten().tolist() cov_observed = [pairwise_freq_observed[i, j, a, b] - (single_freq_observed[i, a] * single_freq_observed[j, b]) for i in range(L - 1) for j in range(i + 1, L) for a in range(20) for b in range(20)] cov_sampled = [pairwise_freq_sampled[i, j, a, b] - (single_freq_sampled[i, a] * single_freq_sampled[j, b]) for i in range(L - 1) for j in range(i + 1, L) for a in range(20) for b in range(20)] pair_freq_observed = np.log(pair_freq_observed) pair_freq_sampled = np.log(pair_freq_sampled) else: x_single = single_freq_observed.flatten().tolist() y_single = single_freq_sampled.flatten().tolist() pair_freq_observed = pairwise_freq_observed[ indices_upper_triangle[0], indices_upper_triangle[1], :, :].flatten().tolist() pair_freq_sampled = pairwise_freq_sampled[ indices_upper_triangle[0], indices_upper_triangle[1], :, :].flatten().tolist() cov_observed = [pairwise_freq_observed[i,j,a,b] - (single_freq_observed[i,a] * single_freq_observed[j,b]) for i in range(L-1) for j in range(i+1, L) for a in range(20) for b in range(20)] cov_sampled = [pairwise_freq_sampled[i,j,a,b] - (single_freq_sampled[i,a] * single_freq_sampled[j,b]) for i in range(L-1) for j in range(i+1, L) for a in range(20) for b in range(20)] ## first trace: single amino acid frequencies trace_single_frequencies = go.Scattergl( x=x_single, y=y_single, mode='markers', name='single frequencies', text=["position: {0}<br>amino acid: {1}".format(i+1,io.AMINO_ACIDS[a]) for i in range(L) for a in range(20)], marker=dict(color='black'), opacity=0.1, showlegend=False ) pearson_corr_single = np.corrcoef(x_single, y_single)[0,1] ## second trace: pairwise amino acid frequencies parir_freq_annotation = ["position: {0}-{1}<br>amino acid: {2}-{3}".format(i+1,j+1, io.AMINO_ACIDS[a], io.AMINO_ACIDS[b]) for i in range(L-1) for j in range(i+1, L) for a in range(20) for b in range(20)] trace_pairwise_frequencies = go.Scattergl( x=pair_freq_observed, y=pair_freq_sampled, mode='markers', name='pairwise frequencies', text=parir_freq_annotation, marker=dict(color='black'), opacity=0.1, showlegend=False ) pearson_corr_pair = np.corrcoef(pair_freq_observed, pair_freq_sampled)[0, 1] ## third trace: covariances trace_cov = go.Scattergl( x=cov_observed, y=cov_sampled, mode='markers', name='covariances', text=parir_freq_annotation, marker=dict(color='black'), opacity=0.1, showlegend=False ) pearson_corr_cov = np.corrcoef(cov_observed, cov_sampled)[0, 1] #define diagonals diag_single = [np.min(x_single + y_single), np.max(x_single + y_single)] diag_pair = [np.min(pair_freq_observed + pair_freq_sampled), np.max(pair_freq_observed + pair_freq_sampled)] diag_cov = [np.min(cov_observed + cov_sampled), np.max(cov_observed+ cov_sampled)] diagonal_single = go.Scattergl( x=diag_single, y=diag_single, mode="lines", showlegend=False, marker=dict(color='rgb(153, 204, 255)') ) diagonal_pair = go.Scattergl( x=diag_pair, y=diag_pair, mode="lines", showlegend=False, marker=dict(color='rgb(153, 204, 255)') ) diagonal_cov = go.Scattergl( x=diag_cov, y=diag_cov, mode="lines", showlegend=False, marker=dict(color='rgb(153, 204, 255)') ) ## define subplots fig = tools.make_subplots( rows=1, cols=3, subplot_titles=["single site amino acid frequencies", "pairwise amino acid frequencies", "covariances"], horizontal_spacing = 0.05 ) ## add traces as subplots fig.append_trace(trace_single_frequencies, 1, 1) fig.append_trace(diagonal_single, 1, 1) fig.append_trace(trace_pairwise_frequencies, 1, 2) fig.append_trace(diagonal_pair, 1, 2) fig.append_trace(trace_cov, 1, 3) fig.append_trace(diagonal_cov, 1, 3) #incresae size of subplot titles fig['layout']['annotations'][0]['font']['size'] = 20 fig['layout']['annotations'][1]['font']['size'] = 20 fig['layout']['annotations'][2]['font']['size'] = 20 # # add text to plot: Pearson correlation coefficient fig['layout']['annotations'].extend( [ dict( x=0.13,#0.02, y=0.04,#0.95, xanchor="left", xref='paper', yref='paper', text='Pearson r = ' + str(np.round(pearson_corr_single, decimals=3)), bgcolor = "white", showarrow=False ), dict( x=0.48,#0.37, y=0.04,#0.95, xanchor="left", xref='paper', yref='paper', text='Pearson r = ' + str(np.round(pearson_corr_pair, decimals=3)), bgcolor="white", showarrow=False ), dict( x=0.85,#0.71, y=0.04,#0.95, xanchor="left", xref='paper', yref='paper', text='Pearson r = ' + str(np.round(pearson_corr_cov, decimals=3)), bgcolor="white", showarrow=False ) ] ) #define layout fig['layout'].update( font = dict(size=20), hovermode = 'closest', width=width ) if title == "": fig['layout']['margin']['t']= 40 fig['layout']['height'] = width/3 else: fig['layout']['margin']['t'] = 120 fig['layout']['title'] = title fig['layout']['titlefont']['size'] =12 fig['layout']['height'] = width/3+100 #specify axis layout details fig['layout']['yaxis1'].update( title="statistics from MCMC sample", exponentformat="e", showexponent='All', scaleanchor="x1", scaleratio=1 ) fig['layout']['yaxis2'].update( exponentformat="e", showexponent='All', scaleanchor="x2", scaleratio=1 ) fig['layout']['yaxis3'].update( exponentformat="e", showexponent='All', scaleanchor="x3", scaleratio=1 ) fig['layout']['xaxis1'].update( exponentformat="e", showexponent='All', scaleanchor="y1", scaleratio=1, showspikes=True ) fig['layout']['xaxis2'].update( title="statistics from natural sequences", exponentformat="e", showexponent='All', scaleanchor="y2", scaleratio=1 ) fig['layout']['xaxis3'].update( exponentformat="e", showexponent='All', scaleanchor="y3", scaleratio=1 ) if log: fig['layout']['xaxis1']['zeroline'] = False fig['layout']['yaxis1']['zeroline'] = False fig['layout']['xaxis2']['zeroline'] = False fig['layout']['yaxis2']['zeroline'] = False fig['layout']['xaxis1']['range'] = np.log([5e-5, 2]) fig['layout']['yaxis1']['range'] = np.log([5e-5, 2]) fig['layout']['xaxis2']['range'] = np.log([5e-5, 2]) fig['layout']['yaxis2']['range'] = np.log([5e-5, 2]) fig['layout']['xaxis1']['ticktext'] = ["{:.0e}".format(i) for i in [1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]] fig['layout']['xaxis1']['tickvals'] = np.log([1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]) fig['layout']['yaxis1']['ticktext'] = ["{:.0e}".format(i) for i in [1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]] fig['layout']['yaxis1']['tickvals'] = np.log([1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]) fig['layout']['xaxis2']['ticktext'] = ["{:.0e}".format(i) for i in [1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]] fig['layout']['xaxis2']['tickvals'] = np.log([1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]) fig['layout']['yaxis2']['ticktext'] = ["{:.0e}".format(i) for i in [1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]] fig['layout']['yaxis2']['tickvals'] = np.log([1e-10, 1e-8, 1e-6, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]) else: fig['layout']['xaxis1']['range'] = [0,1] fig['layout']['xaxis2']['range'] = [0,1] fig['layout']['yaxis1']['range'] = [0,1] fig['layout']['yaxis2']['range'] = [0,1] if plot_out is not None: plotly_plot(fig, filename=plot_out, auto_open=False, link_text='') else: return fig
def plot_ccmgen_noise_quant_figure(subplots, plot_dir, height=500, width=500): precision_noapc_star = [] precision_ec_star = [] # precision_apc_star = [] x = [] for trace in subplots['star topology']['data']: if 'noapc' in trace['name']: precision_noapc_star = trace['y'] if 'ec' in trace['name']: precision_ec_star = trace['y'] x = trace['x'] entropy_noise_star = precision_ec_star - precision_noapc_star entropy_noise_star_trace = go.Scatter( x = x, y = entropy_noise_star, name="entropy noise star", line=dict(width=4) ) precision_noapc_binary = [] precision_ec_binary = [] # precision_apc_binary = [] for trace in subplots['binary topology']['data']: if 'noapc' in trace['name']: precision_noapc_binary = trace['y'] if 'ec' in trace['name']: precision_ec_binary = trace['y'] entropy_noise_binary = precision_ec_binary - precision_noapc_binary entropy_noise_binary_trace = go.Scatter( x = x, y = entropy_noise_binary, name="entropy noise binary", line=dict(width=4) ) phylogenetic_noise = precision_ec_star - precision_ec_binary phylogenetic_noise_trace = go.Scatter( x = x, y = phylogenetic_noise, name="phylogenetic noise", line=dict(width=4) ) data = [ entropy_noise_binary_trace, entropy_noise_star_trace, phylogenetic_noise_trace ] fig = go.Figure( data=data, layout=go.Layout( title="quantification of noise", font=dict(size=18), margin=dict(b=45, t=50), xaxis=dict( title="#predicted contacts / protein length", showspikes=True ), yaxis=dict( title="fraction of noise", range=[0,0.8], showspikes=True ), legend=dict( orientation="v", x=0.15, y=1.0 ), width=width, height=height ) ) plot_file = plot_dir+"/"+"ccmgen_noise_quant_figure.html" plotly_plot(fig, filename=plot_file, auto_open=False, show_link=False) return plot_file
def plot_pll_vs_pcd_benchmark_figure(subplots, plot_dir, height=500, width=500): data = [] #add PCD traces trace_for_lin = copy.copy(subplots['persistent contrastive divergence']['data'][0]) data.append(trace_for_lin) data[-1]['legendgroup'] = 'method' data[-1]['name'] = 'PCD' data[-1]['line']['color'] = 'black' #data[-1]['showlegend'] = True #data[-1]['visible'] = True #'legendonly' for trace in subplots['persistent contrastive divergence']['data']: trace['name'] = trace['name'].split("-")[-1].split("(")[0] #trace['showlegend'] = True trace['legendgroup']='correction' data.append(trace) #add pLL traces trace_for_lin = copy.copy(subplots['pseudo-likelihood maximization']['data'][0]) data.append(trace_for_lin) data[-1]['legendgroup'] = 'method' data[-1]['name'] = 'pLL' data[-1]['line']['color'] = 'black' data[-1]['line']['dash'] = 'dot' data[-1]['showlegend'] = True #data[-1]['visible'] = True #'legendonly' for trace in subplots['pseudo-likelihood maximization']['data']: trace['name'] = trace['name'].split("-")[-1].split("(")[0] trace['legendgroup'] = 'correction' trace['showlegend'] = False trace['line']['dash'] = 'dot' data.append(trace) layout=go.Layout( font = dict(size=18), hovermode = 'closest', title = "", margin=dict(t=10), legend=dict( orientation="v", x=1.01, y=1.0 ), yaxis=dict( title="Mean Precision over Proteins", range=[0,1] ), xaxis=dict( title="#predicted contacts / protein length" ), height=height, width=width ) fig = go.Figure(data=data, layout=layout) plot_file = plot_dir+"/"+"ccmgen_benchmark_figure_pll_vs_pcd.html" plotly_plot(fig, filename=plot_file, auto_open=False, show_link=False) return plot_file
def plot_ccmgen_benchmark_figure(subplots, plot_dir, height=500, width=1500): #titles=['star topology', 'binary topology', 'MCMC sample', 'natural sequences'] titles=['star topology', 'binary topology'] ## define subplot grid fig = tools.make_subplots( rows=1, cols=len(titles), subplot_titles=titles, horizontal_spacing = 0.05, print_grid=False ) col=1 ## add traces as subplots if "star topology" in titles: for trace in subplots['star topology']['data']: trace['name'] = trace['name'].split("-")[-1].split("(")[0] trace['showlegend'] = True trace['legendgroup']= 'correction' trace['text'] = ["star topology ({0}) <br>x: {1} <br>y: {2}".format( trace['name'], trace['x'][i], np.round(trace['y'][i], decimals=3)) for i in range(len(trace['x']))] trace['hoverinfo'] = 'text' fig.append_trace(trace, 1, col) col += 1 if "binary topology" in titles: for trace in subplots['binary topology']['data']: trace['name'] = trace['name'].split("-")[-1].split("(")[0] trace['showlegend'] = False trace['legendgroup']= 'correction' trace['text'] = ["binary topology ({0}) <br>x: {1} <br>y: {2}".format( trace['name'], trace['x'][i], np.round(trace['y'][i], decimals=3)) for i in range(len(trace['x']))] trace['hoverinfo'] = 'text' fig.append_trace(trace, 1, col) col += 1 # if "MCMC sample" in titles: # for trace in subplots['MCMC sample']['data']: # trace['name'] = trace['name'].split("-")[-1].split("(")[0] # trace['showlegend'] = False # trace['legendgroup']= 'correction' # trace['text'] = ["MCMC sample ({0}) <br>x: {1} <br>y: {2}".format( # trace['name'], trace['x'][i], np.round(trace['y'][i], decimals=3)) # for i in range(len(trace['x']))] # trace['hoverinfo'] = 'text' # fig.append_trace(trace, 1, col) # col += 1 # # if 'natural sequences - PCD' in subplots.keys(): # for trace in subplots['natural sequences - PCD']['data']: # trace['name'] = trace['name'].split("-")[-1].split("(")[0] # trace['showlegend'] = False # trace['legendgroup']='correction' # trace['text'] = ["natural sequences - PCD ({0}) <br>x: {1} <br>y: {2}".format( # trace['name'], trace['x'][i], np.round(trace['y'][i], decimals=3)) # for i in range(len(trace['x']))] # trace['hoverinfo'] = 'text' # fig.append_trace(trace, 1, col) # # # fig.append_trace(fig['data'][-1], 1, col) # fig['data'][-1]['legendgroup'] = 'method' # fig['data'][-1]['name'] = 'PCD' # fig['data'][-1]['line']['color'] = 'black' # fig['data'][-1]['showlegend'] = True # fig['data'][-1]['visible'] = 'legendonly' # # if 'natural sequences - PLL' in subplots.keys(): # for trace in subplots['natural sequences - PLL']['data']: # trace['name'] = trace['name'].split("-")[-1].split("(")[0] # trace['legendgroup'] = 'correction' # trace['showlegend'] = False # trace['line']['dash'] = 'dot' # trace['text'] = ["natural sequences - PLL ({0}) <br>x: {1} <br>y: {2}".format( # trace['name'], trace['x'][i], np.round(trace['y'][i], decimals=3)) # for i in range(len(trace['x']))] # trace['hoverinfo'] = 'text' # fig.append_trace(trace, 1, col) # # fig.append_trace(fig['data'][-1], 1, col) # fig['data'][-1]['legendgroup'] = 'method' # fig['data'][-1]['name'] = 'pLL' # fig['data'][-1]['line']['color'] = 'black' # fig['data'][-1]['showlegend'] = True # fig['data'][-1]['visible'] = 'legendonly' #increase subplot title font size for subtitle in fig['layout']['annotations']: subtitle['font']['size'] = 22 subtitle['y'] = 1.03 #add centered x-axis title fig['layout']['annotations'].append( go.Annotation( text="#predicted contacts / protein length", x=0.5, y=-0.15, xref = 'paper', yref = 'paper', showarrow = False, font = dict(size = 22) ) ) #define layout fig['layout'].update( font = dict(size=18), hovermode = 'closest', title = "", margin=dict(t=40), legend=dict( orientation="v", x=1.0, y=1.0 ), yaxis1=dict( title="Mean Precision over Proteins" ), height=height, width=width ) for i in range(1,col+1): fig['layout']['yaxis'+str(i)].update( range=[0,1], zeroline=False, tickvals=[0.1, 0.3, 0.5, 0.7, 0.9], showspikes=True ) fig['layout']['xaxis'+str(i)].update( range=[0,1], zeroline=False, tickvals=[0.1, 0.3, 0.5, 0.7, 0.9], showspikes=True ) plot_file = plot_dir+"/"+"ccmgen_benchmark_figure.html" plotly_plot(fig, filename=plot_file, auto_open=False, link_text='') return plot_file