def show_poisson_views(): """Show different views of a Poisson distribution""" fig, ax = plt.subplots(3,1) k = np.arange(25) pd = stats.poisson(10) C2_8_mystyle.set(12) ax[0].plot(k, pd.pmf(k),'x-') ax[0].set_title('Poisson distribition') ax[0].set_xticklabels([]) ax[0].set_ylabel('PMF (X)') ax[1].plot(k, pd.cdf(k)) ax[1].set_xlabel('X') ax[1].set_ylabel('CDF (X)') y = np.linspace(0,1,100) ax[2].plot(y, pd.ppf(y)) ax[2].set_xlabel('X') ax[2].set_ylabel('PPF (X)') plt.tight_layout() plt.show()
def KS_principle(inData): '''Show the principle of the Kolmogorov-Smirnov test.''' # CDF of normally distributed data nd = stats.norm() nd_x = np.linspace(-4, 4, 101) nd_y = nd.cdf(nd_x) # Empirical CDF of the sample data, which range for approximately 0 to 10 numPts = 50 lowerLim = 0 upperLim = 10 ecdf_x = np.linspace(lowerLim, upperLim, numPts) ecdf_y = stats.cumfreq(data, numPts, (lowerLim, upperLim))[0]/len(inData) #Add zero-point by hand ecdf_x = np.hstack((0., ecdf_x)) ecdf_y = np.hstack((0., ecdf_y)) # Plot the data sns.set_style('ticks') sns.set_context('poster') C2_8_mystyle.set(36) plt.plot(nd_x, nd_y, 'k--') plt.hold(True) plt.plot(ecdf_x, ecdf_y, color='k') plt.xlabel('X') plt.ylabel('Cumulative Probability') # For the arrow, find the start ecdf_startIndex = np.min(np.where(ecdf_x >= 2)) arrowStart = np.array([ecdf_x[ecdf_startIndex], ecdf_y[ecdf_startIndex]]) nd_startIndex = np.min(np.where(nd_x >= 2)) arrowEnd = np.array([nd_x[nd_startIndex], nd_y[nd_startIndex]]) arrowDelta = arrowEnd - arrowStart plt.arrow(arrowStart[0], arrowStart[1], 0, arrowDelta[1], width=0.05, length_includes_head=True, head_length=0.02, head_width=0.2, color='k') plt.arrow(arrowStart[0], arrowStart[1]+arrowDelta[1], 0, -arrowDelta[1], width=0.05, length_includes_head=True, head_length=0.02, head_width=0.2, color='k') outFile = 'KS_Example.png' C2_8_mystyle.printout_plain(outFile)
def generate_probplot(): '''Generate a prob-plot for a chi2-distribution of sample data''' # Define the skewed distribution chi2 = stats.chi2(3) # Generate the data x = np.linspace(0,10, 100) y = chi2.pdf(x) np.random.seed(12345) numData = 100 data = chi2.rvs(numData) # Arrange subplots sns.set_context('paper') sns.set_style('white') C2_8_mystyle.set(11) fig, axs = plt.subplots(1,2) # Plot distribution axs[0].plot(x,y) axs[0].set_xlabel('X') axs[0].set_ylabel('PDF(X)') axs[0].set_title('chi2(x), k=3') sns.set_style('white') x0, x1 = axs[0].get_xlim() y0, y1 = axs[0].get_ylim() axs[0].set_aspect((x1-x0)/(y1-y0)) #sns.despine() # Plot probplot plt.axes(axs[1]) stats.probplot(data, plot=plt) x0, x1 = axs[1].get_xlim() y0, y1 = axs[1].get_ylim() axs[1].axhline(0, lw=0.5, ls='--') axs[1].axvline(0, lw=0.5, ls='--') axs[1].set_aspect((x1-x0)/(y1-y0)) #sns.despine() C2_8_mystyle.printout_plain('chi2pp.png') return(data) '''
def main(): '''Demonstrate the generation of different statistical standard plots''' # Univariate data ------------------------- # Generate data that are normally distributed x = np.random.randn(500) # Set the fonts the way I like them sns.set_context('poster') sns.set_style('ticks') C2_8_mystyle.set(fs=32) # Scatter plot plt.scatter(np.arange(len(x)), x) plt.xlim([0, len(x)]) # Save and show the data, in a systematic format C2_8_mystyle.printout('scatterPlot.png', xlabel='x', ylabel='y', title='Scatter') # Histogram plt.hist(x) C2_8_mystyle.printout('histogram_plain.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, default settings') plt.hist(x,25) C2_8_mystyle.printout('histogram.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, 25 bins') # Cumulative probability density numbins = 20 plt.plot(stats.cumfreq(x,numbins)[0]) C2_8_mystyle.printout('CumulativeFrequencyFunction.png', xlabel='Data Values', ylabel='CumFreq', title='Cumulative Frequncy') # KDE-plot sns.kdeplot(x) C2_8_mystyle.printout('kde.png', xlabel='Data Values', ylabel='Density', title='KDE_plot') # Boxplot # The ox consists of the first, second (middle) and third quartile plt.boxplot(x, sym='*') C2_8_mystyle.printout('boxplot.png', xlabel='Values', title='Boxplot') plt.boxplot(x, sym='*', vert=False) plt.title('Boxplot, horizontal') plt.xlabel('Values') plt.show() # Errorbars x = np.arange(5) y = x**2 errorBar = x/2 plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3) plt.xlim([-0.2, 4.2]) plt.ylim([-0.2, 19]) C2_8_mystyle.printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars') # Violinplot nd = stats.norm data = nd.rvs(size=(100)) nd2 = stats.norm(loc = 3, scale = 1.5) data2 = nd2.rvs(size=(100)) # Use pandas and the seaborn package for the violin plot df = pd.DataFrame({'Girls':data, 'Boys':data2}) sns.violinplot(df) C2_8_mystyle.printout('violinplot.png', title='Violinplot') # Barplot df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd']) df.plot(kind='bar', grid=False) C2_8_mystyle.printout('barplot.png', title='Barplot') # Grouped Boxplot sns.set_style('whitegrid') sns.boxplot(df) C2_8_mystyle.set(fs=28) C2_8_mystyle.printout('groupedBoxplot.png', title='sns.boxplot') # Bivariate Plots df2 = pd.DataFrame(np.random.rand(50, 4), columns=['a', 'b', 'c', 'd']) df2.plot(kind='scatter', x='a', y='b', s=df['c']*300); C2_8_mystyle.printout('bivariate.png') # Pieplot series = pd.Series(3 * np.random.rand(4), index=['a', 'b', 'c', 'd'], name='series') oldPalette = sns.color_palette() sns.set_palette("husl") series.plot(kind='pie', figsize=(6, 6)) C2_8_mystyle.printout('piePlot.png', title='pie-plot') sns.set_palette(oldPalette)
"miR-137" is a short non-coding RNA molecule that functions to regulate the expression levels of other genes. ''' # author: Thomas Haslwanter, date: Jun-2015 # Import standard packages import matplotlib.pyplot as plt import C2_8_mystyle as mystyle # additional packages from lifelines.datasets import load_waltons from lifelines import KaplanMeierFitter from lifelines.statistics import logrank_test # Set my favorite font mystyle.set() # Load and show the data df = load_waltons() # returns a Pandas DataFrame print(df.head()) ''' T E group 0 6 1 miR-137 1 13 1 miR-137 2 13 1 miR-137 3 13 1 miR-137 4 19 1 miR-137 ''' T = df['T']
# author: Thomas Haslwanter, date: July-2015 # Import standard packages import numpy as np import matplotlib.pyplot as plt from scipy import stats import seaborn as sns import C2_8_mystyle x = np.logspace(-9,1,1001)+1e-9 lnd = stats.lognorm(2) y = lnd.pdf(x) sns.set_style('ticks') C2_8_mystyle.set(18) fig, axs = plt.subplots(1,2, sharey=True) sns.set_context('poster') axs[0].plot(x,y) axs[0].set_xlim(-0.1,8) axs[0].set_xlabel('x') axs[0].set_ylabel('pdf(x)') axs[1].plot(np.log(x), y) axs[1].set_xlim(-12,5) axs[1].set_xlabel('log(x)') outFile = 'logNormal.png' C2_8_mystyle.printout_plain(outFile)
# additional packages import C2_8_mystyle # Calculate the values nd = stats.norm() x = np.linspace(-3,3,100) yp = nd.pdf(x) y = nd.cdf(x) x1 = np.linspace(-3, 1) y1 = nd.pdf(x1) # Make the plot sns.set_context('paper') sns.set_style('white') C2_8_mystyle.set(12) figs, axs = plt.subplots(1,2) axs[0].plot(x,yp, 'k') axs[0].fill_between(x1, y1, facecolor='#CCCCCC') axs[0].text(0, 0.1, 'CDF(x)', family='cursive', fontsize=14, horizontalalignment='center', style='italic') axs[0].set_xlabel('x') axs[0].set_ylabel('PDF(x)') sns.despine() axs[1].plot(x, y, '#999999', lw=3) axs[1].set_xlabel('x') axs[1].set_ylabel('CDF(x)') plt.vlines(0, 0, 1, linestyles='--') sns.despine()
ax.xaxis.set_ticks([50,150,250]) ax.set_xticklabels(['Group1', 'Group2', 'Group3']) ax.yaxis.set_ticks([]) ax.set_title(title) grandMean = np.mean(groupMean) ax.axhline(grandMean, color='#999999') ax.plot([80, 220], [groupMean[1], groupMean[1]], '#999999') ax.plot([80, 120], [groupMean[1]+0.2, groupMean[1]+0.2], '#999999') ax.annotate('', xy=(210, grandMean), xytext=(210,groupMean[1]), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.annotate('', xy=(90, groupMean[1]), xytext=(90,groupMean[1]+0.2), arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black')) ax.text(210, (grandMean + groupMean[1])/2., '$SS_{Treatment}$', fontsize=36) ax.text(90, groupMean[1]+0.1, '$SS_{Error}$', ha='right', fontsize=36) if __name__ == '__main__': centers = [5, 5.3, 4.7] np.random.seed(123) C2_8_mystyle.set(30) fig = plt.figure() ax = fig.add_subplot(111) std = 0.1 numData = 100 show_fig(0.1, ax, 'Sum-Squares') # Save and show C2_8_mystyle.printout_plain('anova_annotated.png')
def simplePlots(): '''Demonstrate the generation of different statistical standard plots''' # Univariate data ------------------------- # Make sure that always the same random numbers are generated np.random.seed(1234) # Generate data that are normally distributed x = np.random.randn(500) # Other graphics settings sns.set(context='poster', style='ticks', palette=sns.color_palette('muted')) # Set the fonts the way I like them C2_8_mystyle.set(fs=32) # Scatter plot plt.scatter(np.arange(len(x)), x) plt.xlim([0, len(x)]) # Save and show the data, in a systematic format C2_8_mystyle.printout('scatterPlot.png', xlabel='Datapoints', ylabel='Values', title='Scatter') # Histogram plt.hist(x) C2_8_mystyle.printout('histogram_plain.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, default settings') plt.hist(x,25) C2_8_mystyle.printout('histogram.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, 25 bins') # Cumulative probability density numbins = 20 plt.plot(stats.cumfreq(x,numbins)[0]) C2_8_mystyle.printout('CumulativeFrequencyFunction.png', xlabel='Data Values', ylabel='CumFreq', title='Cumulative Frequency') # KDE-plot sns.kdeplot(x) C2_8_mystyle.printout('kde.png', xlabel='Data Values', ylabel='Density', title='KDE_plot') # Boxplot # The ox consists of the first, second (middle) and third quartile plt.boxplot(x, sym='*') C2_8_mystyle.printout('boxplot.png', xlabel='Values', title='Boxplot') plt.boxplot(x, sym='*', vert=False) plt.title('Boxplot, horizontal') plt.xlabel('Values') plt.show() # Errorbars x = np.arange(5) y = x**2 errorBar = x/2 plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3) plt.xlim([-0.2, 4.2]) plt.ylim([-0.2, 19]) C2_8_mystyle.printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars') # Violinplot nd = stats.norm data = nd.rvs(size=(100)) nd2 = stats.norm(loc = 3, scale = 1.5) data2 = nd2.rvs(size=(100)) # Use pandas and the seaborn package for the violin plot df = pd.DataFrame({'Girls':data, 'Boys':data2}) sns.violinplot(df) C2_8_mystyle.printout('violinplot.png', title='Violinplot') # Barplot # The font-size is set such that the legend does not overlap with the data np.random.seed(1234) C2_8_mystyle.set(20) df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd']) df.plot(kind='bar', grid=False, color=sns.color_palette('muted')) C2_8_mystyle.printout_plain('barplot.png') C2_8_mystyle.set(28) # Bivariate Plots df2 = pd.DataFrame(np.random.rand(50, 3), columns=['a', 'b', 'c']) df2.plot(kind='scatter', x='a', y='b', s=df2['c']*500); plt.axhline(0, ls='--', color='#999999') plt.axvline(0, ls='--', color='#999999') C2_8_mystyle.printout('bivariate.png') # Grouped Boxplot sns.set_style('whitegrid') sns.boxplot(df) C2_8_mystyle.set(fs=28) C2_8_mystyle.printout('groupedBoxplot.png', title='sns.boxplot') sns.set_style('ticks') # Pieplot txtLabels = 'Cats', 'Dogs', 'Frogs', 'Others' fractions = [45, 30, 15, 10] offsets =(0, 0.05, 0, 0) plt.pie(fractions, explode=offsets, labels=txtLabels, autopct='%1.1f%%', shadow=True, startangle=90, colors=sns.color_palette('muted') ) plt.axis('equal') C2_8_mystyle.printout('piePlot.png', title=' ')
import C2_8_mystyle def show_fig(std, ax, title): '''Create a plot of normally distributed data in a given axis''' for ii in range(3): data = stats.norm(centers[ii], std).rvs(numData) offset = ii*numData ax.plot( offset+np.arange(numData), data, '.', ms=10) ax.xaxis.set_ticks([50,150,250]) ax.set_xticklabels(['Group1', 'Group2', 'Group3']) ax.set_title(title) sns.despine() if __name__ == '__main__': # Set up the figure sns.set_context('paper') sns.set_style('whitegrid') C2_8_mystyle.set(14) # Create 2 plots of 3 different, normally distributed data groups, with different SDs fig, axs = plt.subplots(1, 2) centers = [5, 5.3, 4.7] stds = [0.1, 2] numData = 100 show_fig(0.1, axs[0], 'SD=0.1') show_fig(2, axs[1], 'SD=2.0') C2_8_mystyle.printout_plain('anova_oneway.png')