def graph_dendrogram_branches(dfWindow,ranWindow,names,fileName): # Get group, mean and standard deviation for AT ATgroup,ATmean,ATstd = collect_sum_two_nucleotides(dfWindow,names,'A','T') ranATgroup,ranATmean,ranATstd = collect_sum_two_nucleotides(ranWindow,names,'A','T') ATelement = ATgroup.T[(GlobalVariables.plotLineLocationThree-GlobalVariables.methylationflank):(GlobalVariables.plotLineLocationFour+GlobalVariables.methylationflank)] ranATelement = ranATgroup.T[(GlobalVariables.plotLineLocationThree-GlobalVariables.methylationflank):(GlobalVariables.plotLineLocationFour+GlobalVariables.methylationflank)] print 'Extracted just element and methylation flank, size {0}'.format(len(ATelement)) # Title info info = str(fileName) + ', '+ str(len(ATgroup.index)) + ' - ' "UCES" # Plot settings plt.suptitle(info,fontsize=10) pp = PdfPages('Dendrogram_{0}.pdf'.format(fileName)) sns.set_palette("husl",n_colors=8) palette = sns.color_palette() set_link_color_palette(map(rgb2hex, palette)) sns.set_style('white') #http://nbviewer.jupyter.org/gist/vals/150ec97a5b7db9c82ee9 link = linkage(ATelement.T) plt.figure(figsize=(100,10)) den = dendrogram(link,labels=ATelement.T.index,leaf_font_size=3,color_threshold='#AAAAAA')#,link_color_func=lambda x:___[x] Have to make dict of len df, with clustered colors plt.xticks(rotation=90,fontsize=8) sns.despine() plt.tight_layout() sns.despine() pp.savefig() pp.close()
def graph_k_means(dfWindow,ranWindow,names,fileName): # Parameters that all graphs will use plt.figure(figsize=(7,7)) # Get group, mean and standard deviation for AT ATgroup,ATmean,ATstd = collect_sum_two_nucleotides(dfWindow,names,'A','T') ranATgroup,ranATmean,ranATstd = collect_sum_two_nucleotides(ranWindow,names,'A','T') ATelement = ATgroup.T[(GlobalVariables.plotLineLocationThree-GlobalVariables.methylationflank):(GlobalVariables.plotLineLocationFour+GlobalVariables.methylationflank)] ranATelement = ranATgroup.T[(GlobalVariables.plotLineLocationThree-GlobalVariables.methylationflank):(GlobalVariables.plotLineLocationFour+GlobalVariables.methylationflank)] print 'Extracted just element and methylation flank, size {0}'.format(len(ATelement)) # Title info info = str(fileName) + ', '+ str(len(ATgroup.index)) + ' - ' "UCES" # Plot settings sns.set_style('ticks') plt.suptitle(info,fontsize=10) pp = PdfPages('Kmeans_{0}.pdf'.format(fileName)) sns.set_palette("husl",n_colors=8) # get the average first/last inset upinset = ATgroup.T[GlobalVariables.plotLineLocationThree:GlobalVariables.plotLineLocationOne].mean() downinset = ATgroup.T[GlobalVariables.plotLineLocationTwo:GlobalVariables.plotLineLocationFour].mean() ATlist = [list(a) for a in zip(upinset,downinset)] ATarray = np.array(ATlist) print ATarray #https://stackoverflow.com/questions/42398403/python-k-means-clustering-array # kmeans = KMeans(n_clusters=5,random_state=0).fit(ATarray) # see labels #print kmeans.labels_ # predict new points #kmeans.predict([],[]) # see where the centres of clusters are #kmeans.cluster_centers_ gs = gridspec.GridSpec(2,1,height_ratios=[1,1]) gs.update(hspace=.5) # #https://stats.stackexchange.com/questions/9850/how-to-plot-data-output-of-clustering ATstdinitial = [cluster.vq.kmeans(ATarray,i) for i in range(1,10)] ax0 = plt.subplot(gs[0,:]) ax0.plot([var for (cent,var) in ATstdinitial]) cent,var = ATstdinitial[3] assignment,cdist = cluster.vq.vq(ATarray,cent) ax1 = plt.subplot(gs[1,:]) ax1.scatter(ATarray[:,0],ATarray[:,1],c=assignment)
def graph_interactive_bokeh(dfWindow, ranWindow, fileName): seaborn.set_palette("husl", n_colors=8) # Get group, mean and standard deviation for AT ATgroup, ATmean, ATstd = collect_sum_two_nucleotides( dfWindow, names, 'A', 'T') ranATgroup, ranATmean, ranATstd = collect_sum_two_nucleotides( ranWindow, names, 'A', 'T') source = ColumnDataSource(data=dict(x=GlobalVariables.fillX, mean=ATgroup.mean(), std=ATgroup.std(), rmean=ranATgroup.mean(), rstd=ranATgroup.std())) output_file('Interactive_{0}.html'.format(fileName)) p = figure(plot_width=1500, plot_height=600, min_border=10, min_border_left=50, toolbar_location="above", title="Mean AT Content Across Base Pair Position") p.line('x', 'mean', line_width=2, source=source) p.yaxis.axis_label = "% AT Content" p.xaxis.axis_label = "Nucleotide Postion" p.background_fill_color = "#fafafa" select = Select(title="Option:", value="All", option=["All", "Exonic", "Intronic", "Intergenic"]) #toggle = Toggle(label="Reverse Complement") sd = figure(plot_width=1500, plot_height=200, x_range=p.x_range, min_border=10, min_border_left=50, title="Standard Deviation") sd.line('x', 'std', line_width=2, color='#3e1638', alpha=.5, source=source) sd.background_fill_color = "#fafafa" widgets = row(select, toggle) show(column(widgets, p, sd))
def plot_line_graphs(elementDF, elementRC, collectDF, collectRC, allNames, fileName, collectFile): # Get group, mean and standard deviation for AT # Plot settings sns.set_style('ticks') gs = gridspec.GridSpec(2, 1, height_ratios=[1, 1]) gs.update(hspace=.8) # setting the space between the graphs pp = PdfPages('Fangs_{0}.pdf'.format(fileName)) plt.figure(figsize=(7, 7)) sns.set_palette("husl", n_colors=8) #(len(nucLine)*2) ATgroupElement, ATmeanElement, ATstdElement = collect_sum_two_nucleotides( elementDF, allNames, 'A', 'T') ATgroupElementRC, ATmeanElementRC, ATstdElementRC = collect_sum_two_nucleotides( elementRC, allNames, 'A', 'T') # Plot the mean AT content with a std of 1 ax0 = plt.subplot(gs[0]) ax0.plot(GlobalVariables.fillX, ATmeanElement, linewidth=2, label='UCEs') for dfNuc, file in zip(collectDF, collectFile): ATgroup, ATmean, ATstd = collect_sum_two_nucleotides( dfNuc, allNames, 'A', 'T') ax0.plot(GlobalVariables.fillX, ATmean, linewidth=1, alpha=0.3) #,label='{0}'.format(file) ax0.axvline(x=GlobalVariables.plotLineLocationOne, linewidth=.05, linestyle='dashed', color='#e7298a') ax0.axvline(x=GlobalVariables.plotLineLocationTwo, linewidth=.05, linestyle='dashed', color='#e7298a') ax0.axvline(x=GlobalVariables.plotLineLocationThree, linewidth=.05, linestyle='dashed', color='#bd4973') ax0.axvline(x=GlobalVariables.plotLineLocationFour, linewidth=.05, linestyle='dashed', color='#bd4973') ax0.set_ylabel('% AT Content', size=8) ax0.set_xlabel('Position', size=6) ax0.legend(loc=0, fontsize=5, labelspacing=0.1) ax0.set_title('Mean AT Content', size=8) ax0.set_yticks(ax0.get_yticks()[::2]) plt.xlim(0, GlobalVariables.num) # Plot the std = 1 ax1 = plt.subplot(gs[1], sharex=ax0) ax1.plot(GlobalVariables.fillX, ATmeanElementRC, linewidth=2, label='UCEs') for dfNuc, file in zip(collectRC, collectFile): ATgroup, ATmean, ATstd = collect_sum_two_nucleotides( dfNuc, allNames, 'A', 'T') ax1.plot(GlobalVariables.fillX, ATmean, linewidth=1, alpha=0.3) #,label='{0}'.format(file) ax1.axvline(x=GlobalVariables.plotLineLocationOne, linewidth=.05, linestyle='dashed', color='#e7298a') ax1.axvline(x=GlobalVariables.plotLineLocationTwo, linewidth=.05, linestyle='dashed', color='#e7298a') ax1.axvline(x=GlobalVariables.plotLineLocationThree, linewidth=.05, linestyle='dashed', color='#bd4973') ax1.axvline(x=GlobalVariables.plotLineLocationFour, linewidth=.05, linestyle='dashed', color='#bd4973') ax1.set_yticks(ax1.get_yticks()[::2]) ax1.set_xlabel('Position', size=6) ax1.set_ylabel('% AT Content', size=8) ax1.set_title('Mean AT Content, Reverse Completement Sorted', size=8) plt.setp(ax1.get_xticklabels(), visible=True) ax1.legend(loc=0, fontsize=5, labelspacing=0.05) sns.despine() pp.savefig() pp.close()
def graph_signal_lines(dfWindow, names, ranWindow, fileName): # Get group, mean and standard deviation for AT ATgroup, ATmean, ATstd = collect_sum_two_nucleotides( dfWindow, names, 'A', 'T') ranATgroup, ranATmean, ranATstd = collect_sum_two_nucleotides( ranWindow, names, 'A', 'T') # File name info = str(fileName) + ', ' + str(len(ATgroup.index)) + ' - ' "UCES" # Plot settings sns.set_style('ticks') plt.suptitle(info, fontsize=10) sns.set_palette("husl", n_colors=8) #(len(nucLine)*2) plt.figure(figsize=(7, 7)) # Filename pp = PdfPages('Signal_{0}.pdf'.format(fileName)) # Get smoothed mean, first and second derivatives smoothMean, firstDer, secondDer = collect_smoothed_lines(ATmean) ransmoothMean, ranfirstDer, ransecondDer = collect_smoothed_lines( ranATmean) gs = gridspec.GridSpec(3, 3, height_ratios=[1, 1, 1]) gs.update(hspace=.65) # Plot smoothed mean AT ax0 = plt.subplot(gs[0, :]) ax0.plot(GlobalVariables.fillX, smoothMean, linewidth=1, alpha=0.9, label='Element') ax0.plot(GlobalVariables.fillX, ransmoothMean, linewidth=1, alpha=0.9, label='Random') ax0.axvline(x=GlobalVariables.plotLineLocationOne, linewidth=.05, linestyle='dashed', color='#e7298a') ax0.axvline(x=GlobalVariables.plotLineLocationTwo, linewidth=.05, linestyle='dashed', color='#e7298a') ax0.axvline(x=GlobalVariables.plotLineLocationThree, linewidth=.05, linestyle='dashed', color='#bd4973') ax0.axvline(x=GlobalVariables.plotLineLocationFour, linewidth=.05, linestyle='dashed', color='#bd4973') ax0.set_yticks(ax0.get_yticks()[::2]) ax0.set_ylabel('% AT Content', size=8) ax0.set_title('Fitted Mean AT Content', size=8) # First derivative ax1 = plt.subplot(gs[1, :], sharex=ax0) ax1.plot(GlobalVariables.fillX, firstDer, linewidth=1, alpha=0.8, label='Element') ax1.plot(GlobalVariables.fillX, ranfirstDer, linewidth=1, alpha=0.8, label='Random') ax1.axvline(x=GlobalVariables.plotLineLocationOne, linewidth=.05, linestyle='dashed', color='#e7298a') ax1.axvline(x=GlobalVariables.plotLineLocationTwo, linewidth=.05, linestyle='dashed', color='#e7298a') ax1.axvline(x=GlobalVariables.plotLineLocationThree, linewidth=.05, linestyle='dashed', color='#bd4973') ax1.axvline(x=GlobalVariables.plotLineLocationFour, linewidth=.05, linestyle='dashed', color='#bd4973') ax1.axhline(y=0, linewidth=.1, alpha=0.3) #,color='#bd4973' ax1.set_yticks(ax1.get_yticks()[::2]) ax1.set_ylabel('Amplitude', size=8) ax1.set_title('First Derivative of Fitted Mean', size=8) # Second derivative ax2 = plt.subplot(gs[2, :], sharex=ax0) peakMean = signal.find_peaks_cwt(secondDer, np.arange(1, 45)).astype(int) print 'Found peaks for elements second derivative' ax2.plot(GlobalVariables.fillX, secondDer, linewidth=1, alpha=0.7, label='Element') ax2.plot(GlobalVariables.fillX, ransecondDer, linewidth=1, alpha=0.7, label='Random') ax2.scatter(peakMean, secondDer[peakMean], marker='.') #,color='#ae3e9e' ax2.axvline(x=GlobalVariables.plotLineLocationOne, linewidth=.05, linestyle='dashed', color='#e7298a') ax2.axvline(x=GlobalVariables.plotLineLocationTwo, linewidth=.05, linestyle='dashed', color='#e7298a') ax2.axvline(x=GlobalVariables.plotLineLocationThree, linewidth=.05, linestyle='dashed', color='#bd4973') ax2.axvline(x=GlobalVariables.plotLineLocationFour, linewidth=.05, linestyle='dashed', color='#bd4973') ax2.axhline(y=0, linewidth=.1, alpha=0.3) #,color='#bd4973' ax2.set_ylabel('Amplitude', size=8) ax2.set_xlabel('Position', size=6) ax2.set_yticks(ax2.get_yticks()[::2]) ax2.set_title('Second Derivative of Fitted Mean', size=8) sns.despine() plt.savefig(pp, format='pdf') print 'Plotted mean, first and second derivatives for elements and random regions' # Frequency of inflection points gs = gridspec.GridSpec(1, 1, height_ratios=[1]) ax3 = plt.subplot(gs[0]) inflectionList, infUCEpeaks = locate_second_derivative_inflection_points( ATgroup) raninflectionList, raninfUCEpeaks = locate_second_derivative_inflection_points( ranATgroup) IFbins = GlobalVariables.num / 10 ax3.hist(inflectionList, IFbins, alpha=0.3, label='Element') ax3.hist(raninflectionList, IFbins, alpha=0.3, label='Random') ax3.axvline(x=GlobalVariables.plotLineLocationOne, linewidth=.05, linestyle='dashed', color='#e7298a') ax3.axvline(x=GlobalVariables.plotLineLocationTwo, linewidth=.05, linestyle='dashed', color='#e7298a') ax3.axvline(x=GlobalVariables.plotLineLocationThree, linewidth=.05, linestyle='dashed', color='#bd4973') ax3.axvline(x=GlobalVariables.plotLineLocationFour, linewidth=.05, linestyle='dashed', color='#bd4973') ax3.set_yticks(ax3.get_yticks()[::2]) ax3.set_ylabel('Frequency', size=8) ax3.legend(loc=0, fontsize=5, labelspacing=0.1) ax3.set_xlabel('Inflection Point Location', size=8) sns.despine() plt.savefig(pp, format='pdf') print 'Plotted inflection point frequency for elements and random regions' gs = gridspec.GridSpec(2, 1, height_ratios=[1, 1]) gs.update(hspace=.65) ax5 = plt.subplot(gs[0], sharex=ax0) endRange = 25 widths = np.arange(1, endRange) cwtmatr = signal.cwt(firstDer, signal.ricker, widths) ax5.imshow(cwtmatr, cmap='RdPu', extent=[ 0, (GlobalVariables.num - GlobalVariables.window), 1, endRange ], aspect='auto', vmax=abs(cwtmatr).max(), vmin=-abs(cwtmatr).max()) ax5.set_xlabel('Position', size=6) ax5.set_yticks(ax5.get_yticks()[::2]) ax5.set_title( 'Continuous Wavelet Transformation Convolved Over Range {0}-{1} for the First Derivative, Element' .format(widths[0], endRange), size=8) ax6 = plt.subplot(gs[1], sharex=ax0) cwtmatran = signal.cwt(ranfirstDer, signal.ricker, widths) ax6.imshow(cwtmatran, cmap='RdPu', extent=[ 0, (GlobalVariables.num - GlobalVariables.window), 1, endRange ], aspect='auto', vmax=abs(cwtmatran).max(), vmin=-abs(cwtmatran).max()) ax6.set_xlabel('Position', size=6) ax6.set_yticks(ax5.get_yticks()[::2]) ax6.set_title( 'Continuous Wavelet Transformation Convolved Over Range {0}-{1} for the First Derivative, Random Regions' .format(widths[0], endRange), size=8) sns.despine() plt.savefig(pp, format='pdf') print 'Plotted continuous wavelet transformation for elements and random regions' gs = gridspec.GridSpec(3, 3, height_ratios=[2, 1, 1]) gs.update(hspace=.65) # Short Fourier Transform ax7 = plt.subplot(gs[0, :], sharex=ax0) sbins = 30 f1, t1, Zxx1 = signal.stft(firstDer, fs=1.0, window='hann', nperseg=sbins, noverlap=None) #,nperseg=11,noverlap=5 ax7.pcolormesh(t1, f1, np.abs(Zxx1), cmap='RdPu') ax7.axvline(x=GlobalVariables.plotLineLocationOne, linewidth=.05, linestyle='dashed', color='#5fc85b') ax7.axvline(x=GlobalVariables.plotLineLocationTwo, linewidth=.05, linestyle='dashed', color='#5fc85b') ax7.axvline(x=GlobalVariables.plotLineLocationThree, linewidth=.05, linestyle='dashed', color='#96c85b') ax7.axvline(x=GlobalVariables.plotLineLocationFour, linewidth=.05, linestyle='dashed', color='#96c85b') ax7.set_ylabel('Frequency', size=8) ax7.set_xlabel('Position', size=6) ax7.set_yticks(ax7.get_yticks()[::2]) ax7.set_title( 'Short Fourier Transform over {0} bins for Elements'.format(sbins), size=8) # First Derivative ax8 = plt.subplot(gs[1, :], sharex=ax0) ax8.plot(GlobalVariables.fillX, firstDer, linewidth=1) ax8.axvline(x=GlobalVariables.plotLineLocationOne, linewidth=.05, linestyle='dashed', color='#e7298a') ax8.axvline(x=GlobalVariables.plotLineLocationTwo, linewidth=.05, linestyle='dashed', color='#e7298a') ax8.axvline(x=GlobalVariables.plotLineLocationThree, linewidth=.05, linestyle='dashed', color='#bd4973') ax8.axvline(x=GlobalVariables.plotLineLocationFour, linewidth=.05, linestyle='dashed', color='#bd4973') ax8.axvspan(GlobalVariables.window, GlobalVariables.plotLineLocationThree, label='', alpha=0.1, facecolor='#863eae') ax8.axvspan(GlobalVariables.plotLineLocationThree, GlobalVariables.plotLineLocationFour, label='', alpha=0.1, facecolor='#ae3e9e') ax8.axvspan(GlobalVariables.plotLineLocationFour, (GlobalVariables.num - GlobalVariables.window - GlobalVariables.window), label='', alpha=0.1, facecolor='#ae3e66') ax8.set_yticks(ax8.get_yticks()[::2]) ax8.set_xlabel('Position', size=6) ax8.set_ylabel('Amplitude', size=8) ax8.set_title('First Derivative of Fitted Mean for Elements', size=8) ysdElement = get_element_coordinates(firstDer) frq2sd, Y2sd = perform_fourier_transfroms(ysdElement) ysdUp = get_up_stream_flank_coordinates(firstDer) frq3sd, Y3sd = perform_fourier_transfroms(ysdUp) ysdDown = get_down_stream_flank_coordinates(firstDer) frq4sd, Y4sd = perform_fourier_transfroms(ysdDown) print 'Performed fourier transform for elements' #FFT for sections of the smoothed second derivative ax9 = plt.subplot(gs[2, 0]) ax9.plot(frq3sd, abs(Y3sd), linewidth=1, color='#863eae') ax9.set_ylabel('|Y(freq)|', size=8) ax9.set_xlabel('Freq(Hz)', size=6) #AT Rate Change ax9.set_yticks(ax9.get_yticks()[::2]) ax10 = plt.subplot(gs[2, 1], sharey=ax9) plt.setp(ax10.get_yticklabels(), visible=False) ax10.plot(frq2sd, abs(Y2sd), linewidth=1, color='#ae3e9e') ax10.set_title('Power Series for Highlighted Regions, Elements', size=8) # Power Spectrum Analysis for FFT ax10.set_xlabel('Freq(Hz)', size=6) ax11 = plt.subplot(gs[2, 2], sharey=ax9) plt.setp(ax11.get_yticklabels(), visible=False) ax11.plot(frq4sd, abs(Y4sd), linewidth=1, color='#ae3e66') ax11.set_xlabel('Freq(Hz)', size=6) sns.despine() plt.savefig(pp, format='pdf') print 'Plotted short fourier transform and fast fourier transform for elements' gs = gridspec.GridSpec(3, 3, height_ratios=[2, 1, 1]) gs.update(hspace=.65) ax12 = plt.subplot(gs[0, :], sharex=ax0) sbins = 30 ranf1, rant1, ranZxx1 = signal.stft(ranfirstDer, fs=1.0, window='hann', nperseg=sbins, noverlap=None) #,nperseg=11,noverlap=5 ax12.pcolormesh(rant1, ranf1, np.abs(ranZxx1), cmap='RdPu') ax12.axvline(x=GlobalVariables.plotLineLocationOne, linewidth=.05, linestyle='dashed', color='#5fc85b') ax12.axvline(x=GlobalVariables.plotLineLocationTwo, linewidth=.05, linestyle='dashed', color='#5fc85b') ax12.axvline(x=GlobalVariables.plotLineLocationThree, linewidth=.05, linestyle='dashed', color='#96c85b') ax12.axvline(x=GlobalVariables.plotLineLocationFour, linewidth=.05, linestyle='dashed', color='#96c85b') ax12.set_ylabel('Frequency', size=8) ax12.set_xlabel('Position', size=6) ax12.set_yticks(ax12.get_yticks()[::2]) ax12.set_title( 'Short Fourier Transform over {0} bins for Random Regions'.format( sbins), size=8) # First Derivative ax13 = plt.subplot(gs[1, :], sharex=ax0) ax13.plot(GlobalVariables.fillX, ranfirstDer, linewidth=1) ax13.axvline(x=GlobalVariables.plotLineLocationOne, linewidth=.05, linestyle='dashed', color='#e7298a') ax13.axvline(x=GlobalVariables.plotLineLocationTwo, linewidth=.05, linestyle='dashed', color='#e7298a') ax13.axvline(x=GlobalVariables.plotLineLocationThree, linewidth=.05, linestyle='dashed', color='#bd4973') ax13.axvline(x=GlobalVariables.plotLineLocationFour, linewidth=.05, linestyle='dashed', color='#bd4973') ax13.axvspan(GlobalVariables.window, GlobalVariables.plotLineLocationThree, label='', alpha=0.1, facecolor='#863eae') ax13.axvspan(GlobalVariables.plotLineLocationThree, GlobalVariables.plotLineLocationFour, label='', alpha=0.1, facecolor='#ae3e9e') ax13.axvspan(GlobalVariables.plotLineLocationFour, (GlobalVariables.num - GlobalVariables.window - GlobalVariables.window), label='', alpha=0.1, facecolor='#ae3e66') ax13.set_yticks(ax13.get_yticks()[::2]) ax13.set_xlabel('Position', size=6) ax13.set_ylabel('Amplitude', size=8) ax13.set_title('First Derivative of Fitted Mean for Random Regions', size=8) ranysdElement = get_element_coordinates(ranfirstDer) ranfrq2sd, ranY2sd = perform_fourier_transfroms(ranysdElement) ranysdUp = get_up_stream_flank_coordinates(ranfirstDer) ranfrq3sd, ranY3sd = perform_fourier_transfroms(ranysdUp) ranysdDown = get_down_stream_flank_coordinates(ranfirstDer) ranfrq4sd, ranY4sd = perform_fourier_transfroms(ranysdDown) print 'Performed fourier transform for random regions' #FFT for sections of the smoothed second derivative ax14 = plt.subplot(gs[2, 0]) ax14.plot(ranfrq3sd, abs(ranY3sd), linewidth=1, color='#863eae') ax14.set_ylabel('|Y(freq)|', size=8) ax14.set_xlabel('Freq(Hz)', size=6) #AT Rate Change ax14.set_yticks(ax14.get_yticks()[::2]) ax15 = plt.subplot(gs[2, 1], sharey=ax9) plt.setp(ax15.get_yticklabels(), visible=False) ax15.plot(ranfrq2sd, abs(ranY2sd), linewidth=1, color='#ae3e9e') ax15.set_title('Power Series for Highlighted Regions, Random Regions', size=8) ax15.set_xlabel('Freq(Hz)', size=6) ax16 = plt.subplot(gs[2, 2], sharey=ax9) plt.setp(ax16.get_yticklabels(), visible=False) ax16.plot(ranfrq4sd, abs(ranY4sd), linewidth=1, color='#ae3e66') ax16.set_xlabel('Freq(Hz)', size=6) sns.despine() pp.savefig() pp.close() print 'Plotted short fourier transform and fast fourier transform for random regions'
def graph_cluster(dfWindow, ranWindow, pdMeth, rnMeth, names, fileName): plt.figure(figsize=(7, 7)) # Get group, mean and standard deviation for AT ATgroup, ATmean, ATstd = collect_sum_two_nucleotides( dfWindow, names, 'A', 'T') ranATgroup, ranATmean, ranATstd = collect_sum_two_nucleotides( ranWindow, names, 'A', 'T') ATelement = ATgroup.T[(GlobalVariables.plotLineLocationThree - GlobalVariables.methylationflank):( GlobalVariables.plotLineLocationFour + GlobalVariables.methylationflank)] ranATelement = ranATgroup.T[(GlobalVariables.plotLineLocationThree - GlobalVariables.methylationflank):( GlobalVariables.plotLineLocationFour + GlobalVariables.methylationflank)] print 'Extracted just element and methylation flank, size {0}'.format( len(ATelement)) # Title info info = str(fileName) + ', ' + str(len(ATgroup.index)) + ' - ' "UCES" # Plot settings sns.set_style('ticks') plt.suptitle(info, fontsize=10) pp = PdfPages('Cluster_{0}.pdf'.format(fileName)) sns.set_palette("husl", n_colors=8) #(len(nucLine)*2) # Use the row_colors to color those with similar SD? huslPalette = sns.husl_palette(8, s=.45) elementColors, positionColors = make_dictionary_for_colors( ATelement, huslPalette) heatmap0 = sns.clustermap(ATelement.T, cmap='RdPu', vmin=0, vmax=100, xticklabels=50, col_cluster=False, row_colors=elementColors, col_colors=positionColors) plt.setp(heatmap0.ax_heatmap.tick_params(labelsize=8)) plt.setp(heatmap0.ax_heatmap.set_yticks([])) plt.setp(heatmap0.ax_heatmap.yaxis.tick_right()) plt.setp( heatmap0.ax_heatmap.set_ylabel('{0} UCEs'.format(len( ATelement.T.index)), size=8)) plt.setp(heatmap0.ax_heatmap.set_xlabel('Position', size=10)) plt.setp(heatmap0.ax_heatmap.tick_params(labelsize=10)) plt.setp( heatmap0.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationOneFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap0.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationTwoFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap0.ax_heatmap.axvline( x=GlobalVariables.plotLineLocationThreeFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap0.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationFourFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap0.ax_heatmap.set_title('Mean AT Content per Element', size=12)) # ATOrdered = heatmap0.dendrogram_row.reordered_ind sns.despine() pp.savefig() # Use the row_colors to color those with similar SD? ranelementColors, ranpositionColors = make_dictionary_for_colors( ranATelement, huslPalette) heatmap1 = sns.clustermap(ranATelement.T, cmap='RdPu', vmin=0, vmax=100, xticklabels=50, col_cluster=False, row_colors=ranelementColors, col_colors=ranpositionColors) plt.setp(heatmap1.ax_heatmap.tick_params(labelsize=8)) plt.setp(heatmap1.ax_heatmap.set_yticks([])) plt.setp(heatmap1.ax_heatmap.yaxis.tick_right()) plt.setp( heatmap1.ax_heatmap.set_ylabel('{0} UCEs'.format( len(ranATelement.T.index)), size=8)) plt.setp(heatmap1.ax_heatmap.set_xlabel('Position', size=10)) plt.setp(heatmap1.ax_heatmap.tick_params(labelsize=10)) plt.setp( heatmap1.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationOneFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap1.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationTwoFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap1.ax_heatmap.axvline( x=GlobalVariables.plotLineLocationThreeFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap1.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationFourFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap1.ax_heatmap.set_title('Mean AT Content per Random Region', size=12)) # ranATOrdered = heatmap1.dendrogram_row.reordered_ind sns.despine() pp.savefig() print 'Plotted cluster plot for mean AT content for all elements and random regions' # Various combinations to plot on heatmaps, just for element plus methylation flanks # Frequency x Tissue x ID X Location FreqPlusID, FreqMinusID = collect_methylation_by_index(pdMeth, 'id') FreqPlusTis, FreqMinusTis = collect_methylation_by_index(pdMeth, 'tissue') XPlus, XMinus = collect_tissue_by_id_dataframe(pdMeth, 'id', 'tissue') ranFreqPlusID, ranFreqMinusID = collect_methylation_by_index(rnMeth, 'id') ranFreqPlusTis, ranFreqMinusTis = collect_methylation_by_index( rnMeth, 'tissue') ranXPlus, ranXMinus = collect_tissue_by_id_dataframe( rnMeth, 'id', 'tissue') # Remove UCEs with out methylation within the element - only for ID group FreqPlusID = FreqPlusID[(FreqPlusID.T != 0).any()] FreqMinusID = FreqMinusID[(FreqMinusID.T != 0).any()] ranFreqPlusID = ranFreqPlusID[(ranFreqPlusID.T != 0).any()] ranFreqMinusID = ranFreqMinusID[(ranFreqMinusID.T != 0).any()] # Make heatmap for # methylation on pos strand (Frequency) heatmap2 = sns.clustermap(FreqPlusTis, cmap='RdPu', xticklabels=50, col_cluster=False) ylabels2 = heatmap2.ax_heatmap.get_yticklabels() plt.setp(heatmap2.ax_heatmap.set_yticklabels(ylabels2, rotation=0)) plt.setp(heatmap2.ax_heatmap.yaxis.tick_right()) plt.setp(heatmap2.ax_heatmap.set_ylabel('Sample', size=10)) plt.setp(heatmap2.ax_heatmap.set_xlabel('Position', size=10)) plt.setp(heatmap2.ax_heatmap.tick_params(labelsize=10)) plt.setp( heatmap2.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationOneFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap2.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationTwoFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap2.ax_heatmap.axvline( x=GlobalVariables.plotLineLocationThreeFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap2.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationFourFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap2.ax_heatmap.set_title( 'Methylation Frequency on Plus Strand for Elements', size=12)) sns.despine() pp.savefig() # Make heatmap for # methylation on pos strand (Frequency) heatmap3 = sns.clustermap(FreqMinusTis, cmap='RdPu', xticklabels=50, col_cluster=False) ylabels3 = heatmap3.ax_heatmap.get_yticklabels() plt.setp(heatmap3.ax_heatmap.set_yticklabels(ylabels3, rotation=0)) plt.setp(heatmap3.ax_heatmap.yaxis.tick_right()) plt.setp(heatmap3.ax_heatmap.set_ylabel('Sample', size=10)) plt.setp(heatmap3.ax_heatmap.set_xlabel('Position', size=10)) plt.setp(heatmap3.ax_heatmap.tick_params(labelsize=10)) plt.setp( heatmap3.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationOneFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap3.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationTwoFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap3.ax_heatmap.axvline( x=GlobalVariables.plotLineLocationThreeFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap3.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationFourFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap3.ax_heatmap.set_title( 'Methylation Frequency on Minus Strand for Elements', size=12)) sns.despine() pp.savefig() print 'Plotted methylation frequency for tissue types x position, for element' # Make heatmap for # methylation on pos strand (Frequency) heatmap4 = sns.clustermap(ranFreqPlusTis, cmap='RdPu', xticklabels=50, col_cluster=False) ylabels4 = heatmap4.ax_heatmap.get_yticklabels() plt.setp(heatmap4.ax_heatmap.set_yticklabels(ylabels4, rotation=0)) plt.setp(heatmap4.ax_heatmap.yaxis.tick_right()) plt.setp(heatmap4.ax_heatmap.set_ylabel('Sample', size=10)) plt.setp(heatmap4.ax_heatmap.set_xlabel('Position', size=10)) plt.setp(heatmap4.ax_heatmap.tick_params(labelsize=10)) plt.setp( heatmap4.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationOneFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap4.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationTwoFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap4.ax_heatmap.axvline( x=GlobalVariables.plotLineLocationThreeFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap4.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationFourFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap4.ax_heatmap.set_title( 'Methylation Frequency on Plus Strand for Random Regions', size=12)) sns.despine() pp.savefig() # Make heatmap for # methylation on pos strand (Frequency) heatmap5 = sns.clustermap(ranFreqMinusTis, cmap='RdPu', xticklabels=50, col_cluster=False) ylabels5 = heatmap5.ax_heatmap.get_yticklabels() plt.setp(heatmap5.ax_heatmap.set_yticklabels(ylabels5, rotation=0)) plt.setp(heatmap5.ax_heatmap.yaxis.tick_right()) plt.setp(heatmap5.ax_heatmap.set_ylabel('Sample', size=10)) plt.setp(heatmap5.ax_heatmap.set_xlabel('Position', size=10)) plt.setp(heatmap5.ax_heatmap.tick_params(labelsize=10)) plt.setp( heatmap5.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationOneFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap5.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationTwoFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap5.ax_heatmap.axvline( x=GlobalVariables.plotLineLocationThreeFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap5.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationFourFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap5.ax_heatmap.set_title( 'Methylation Frequency on Minus Strand for Random Regions', size=12)) sns.despine() pp.savefig() print 'Plotted methylation frequency for tissue types x position, for random regions' # Make heatmap for # methylation on pos strand (Frequency) heatmap6 = sns.clustermap(FreqPlusID, cmap='RdPu', xticklabels=50, col_cluster=False) ylabels6 = heatmap6.ax_heatmap.get_yticklabels() plt.setp(heatmap6.ax_heatmap.set_yticklabels(ylabels6, rotation=0)) plt.setp(heatmap6.ax_heatmap.yaxis.tick_right()) plt.setp( heatmap6.ax_heatmap.set_ylabel('{0} Elements'.format( len(FreqPlusID.index)), size=10)) plt.setp(heatmap6.ax_heatmap.set_xlabel('Position', size=10)) plt.setp(heatmap6.ax_heatmap.tick_params(labelsize=10)) plt.setp( heatmap6.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationOneFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap6.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationTwoFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap6.ax_heatmap.axvline( x=GlobalVariables.plotLineLocationThreeFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap6.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationFourFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap6.ax_heatmap.set_title( 'Methylation Frequency on Plus Strand for Elements', size=12)) sns.despine() pp.savefig() # Make heatmap for # methylation on neg strand (Frequency) heatmap7 = sns.clustermap(FreqMinusID, cmap='RdPu', xticklabels=50, col_cluster=False) ylabels7 = heatmap7.ax_heatmap.get_yticklabels() plt.setp(heatmap7.ax_heatmap.set_yticklabels(ylabels7, rotation=0)) plt.setp(heatmap7.ax_heatmap.yaxis.tick_right()) plt.setp( heatmap7.ax_heatmap.set_ylabel('{0} Elements'.format( len(FreqMinusID.index)), size=10)) plt.setp(heatmap7.ax_heatmap.set_xlabel('Position', size=10)) plt.setp(heatmap7.ax_heatmap.tick_params(labelsize=10)) plt.setp( heatmap7.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationOneFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap7.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationTwoFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap7.ax_heatmap.axvline( x=GlobalVariables.plotLineLocationThreeFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap7.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationFourFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap7.ax_heatmap.set_title( 'Methylation Frequency on Minus Strand for Elements', size=12)) sns.despine() pp.savefig() print 'Plotted methylation frequency for element x position , element' # Make heatmap for # methylation on pos strand (Frequency) heatmap8 = sns.clustermap(ranFreqPlusID, cmap='RdPu', xticklabels=50, col_cluster=False) ylabels8 = heatmap8.ax_heatmap.get_yticklabels() plt.setp(heatmap8.ax_heatmap.set_yticks([])) plt.setp(heatmap8.ax_heatmap.yaxis.tick_right()) plt.setp( heatmap8.ax_heatmap.set_ylabel('{0} Elements'.format( len(ranFreqPlusID.index)), size=10)) plt.setp(heatmap8.ax_heatmap.set_xlabel('Position', size=10)) plt.setp(heatmap8.ax_heatmap.tick_params(labelsize=10)) plt.setp( heatmap8.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationOneFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap8.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationTwoFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap8.ax_heatmap.axvline( x=GlobalVariables.plotLineLocationThreeFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap8.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationFourFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap8.ax_heatmap.set_title( 'Methylation Frequency on Plus Strand for Random Regions', size=12)) sns.despine() pp.savefig() # Make heatmap for # methylation on neg strand (Frequency) heatmap9 = sns.clustermap(ranFreqMinusID, cmap='RdPu', xticklabels=50, col_cluster=False) ylabels9 = heatmap9.ax_heatmap.get_yticklabels() plt.setp(heatmap9.ax_heatmap.set_yticks([])) plt.setp(heatmap9.ax_heatmap.yaxis.tick_right()) plt.setp( heatmap9.ax_heatmap.set_ylabel('{0} Elements'.format( len(ranFreqMinusID.index)), size=10)) plt.setp(heatmap9.ax_heatmap.set_xlabel('Position', size=10)) plt.setp(heatmap9.ax_heatmap.tick_params(labelsize=10)) plt.setp( heatmap9.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationOneFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap9.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationTwoFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap9.ax_heatmap.axvline( x=GlobalVariables.plotLineLocationThreeFull, linewidth=.05, linestyle='dashed', color='#5fc85b', alpha=0.5)) plt.setp( heatmap9.ax_heatmap.axvline(x=GlobalVariables.plotLineLocationFourFull, linewidth=.05, linestyle='dashed', color='#96c85b', alpha=0.5)) plt.setp( heatmap9.ax_heatmap.set_title( 'Methylation Frequency on Minus StrandStrand for Random Regions', size=12)) sns.despine() pp.savefig() print 'Plotted methylation frequency for element x position , random regions' # Make heatmap for # methylation on pos strand (Frequency) heatmap10 = sns.clustermap(XPlus, cmap='RdPu') ylabels10 = heatmap10.ax_heatmap.get_yticklabels() plt.setp(heatmap10.ax_heatmap.set_yticklabels(ylabels10, rotation=0)) plt.setp(heatmap10.ax_heatmap.yaxis.tick_right()) plt.setp( heatmap10.ax_heatmap.set_ylabel('{0} Elements'.format( len(FreqPlusID.index)), size=10)) plt.setp(heatmap10.ax_heatmap.set_xlabel('Sample', size=10)) plt.setp(heatmap10.ax_heatmap.tick_params(labelsize=10)) plt.setp( heatmap10.ax_heatmap.set_title( 'Methylation Frequency on Plus Strand for Elements', size=12)) sns.despine() pp.savefig() # Make heatmap for # methylation on neg strand (Frequency) heatmap11 = sns.clustermap(XMinus, cmap='RdPu') ylabels11 = heatmap11.ax_heatmap.get_yticklabels() plt.setp(heatmap11.ax_heatmap.set_yticklabels(ylabels11, rotation=0)) plt.setp(heatmap11.ax_heatmap.yaxis.tick_right()) plt.setp( heatmap11.ax_heatmap.set_ylabel('{0} Elements'.format( len(FreqMinusID.index)), size=10)) plt.setp(heatmap11.ax_heatmap.set_xlabel('Sample', size=10)) plt.setp(heatmap11.ax_heatmap.tick_params(labelsize=10)) plt.setp( heatmap11.ax_heatmap.set_title( 'Methylation Frequency on Minus Strand for Elements', size=12)) sns.despine() pp.savefig() print 'Plotted methylation frequency for element x tissue type , element' # Make heatmap for # methylation on pos strand (Frequency) heatmap12 = sns.clustermap(ranXPlus, cmap='RdPu') ylabels12 = heatmap12.ax_heatmap.get_yticklabels() plt.setp(heatmap12.ax_heatmap.set_yticks([])) plt.setp(heatmap12.ax_heatmap.yaxis.tick_right()) plt.setp( heatmap12.ax_heatmap.set_ylabel('{0} Elements'.format( len(ranFreqPlusID.index)), size=10)) plt.setp(heatmap12.ax_heatmap.set_xlabel('Sample', size=10)) plt.setp(heatmap12.ax_heatmap.tick_params(labelsize=10)) plt.setp( heatmap12.ax_heatmap.set_title( 'Methylation Frequency on Plus Strand for Random Regions', size=12)) sns.despine() pp.savefig() # Make heatmap for # methylation on neg strand (Frequency) heatmap13 = sns.clustermap(ranXMinus, cmap='RdPu') ylabels13 = heatmap13.ax_heatmap.get_yticklabels() plt.setp(heatmap13.ax_heatmap.set_yticks([])) plt.setp(heatmap13.ax_heatmap.yaxis.tick_right()) plt.setp( heatmap13.ax_heatmap.set_ylabel('{0} Elements'.format( len(ranFreqMinusID.index)), size=10)) plt.setp(heatmap13.ax_heatmap.set_xlabel('Sample', size=10)) plt.setp(heatmap13.ax_heatmap.tick_params(labelsize=10)) plt.setp( heatmap13.ax_heatmap.set_title( 'Methylation Frequency on Minus Strand for Random Regions', size=12)) sns.despine() pp.savefig() print 'Plotted methylation frequency for element x position , random regions' # #put the index in a list # UCEindex = ATelement.T.index.tolist() # RANindex = ranATelement.T.index.tolist() # # reorder index based on clustering # ATsorted = [UCEindex[i] for i in ATOrdered] # RANsorted = [RANindex[i] for i in ranATOrdered] # # GraphTableLibrary.main(ATOrdered,ranATOrdered,'Cluster_{0}'.format(fileName)) # print 'Created table for re-ordered mean AT cluster data' sns.despine() pp.savefig() pp.close()