def smSolution(M1, M2, M3): '''Solution with the tools from statsmodels''' import statsmodels.api as sm import C2_8_mystyle Res1 = sm.OLS(y, M1).fit() Res2 = sm.OLS(y, M2).fit() Res3 = sm.OLS(y, M3).fit() print(Res1.summary2()) print(Res2.summary2()) print(Res3.summary2()) # Plot the data plt.plot(x,y, '.', label='Data') plt.plot(x, Res1.fittedvalues, 'r--', label='Linear Fit') plt.plot(x, Res2.fittedvalues, 'g', label='Quadratic Fit') plt.plot(x, Res3.fittedvalues, 'y', label='Cubic Fit') plt.legend(loc='upper left', shadow=True) C2_8_mystyle.printout('linearModel.png', xlabel='x', ylabel='y')
def main(): '''Demonstrate the generation of different statistical standard plots''' # Univariate data ------------------------- # Generate data that are normally distributed x = np.random.randn(500) # Set the fonts the way I like them sns.set_context('poster') sns.set_style('ticks') C2_8_mystyle.set(fs=32) # Scatter plot plt.scatter(np.arange(len(x)), x) plt.xlim([0, len(x)]) # Save and show the data, in a systematic format C2_8_mystyle.printout('scatterPlot.png', xlabel='x', ylabel='y', title='Scatter') # Histogram plt.hist(x) C2_8_mystyle.printout('histogram_plain.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, default settings') plt.hist(x,25) C2_8_mystyle.printout('histogram.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, 25 bins') # Cumulative probability density numbins = 20 plt.plot(stats.cumfreq(x,numbins)[0]) C2_8_mystyle.printout('CumulativeFrequencyFunction.png', xlabel='Data Values', ylabel='CumFreq', title='Cumulative Frequncy') # KDE-plot sns.kdeplot(x) C2_8_mystyle.printout('kde.png', xlabel='Data Values', ylabel='Density', title='KDE_plot') # Boxplot # The ox consists of the first, second (middle) and third quartile plt.boxplot(x, sym='*') C2_8_mystyle.printout('boxplot.png', xlabel='Values', title='Boxplot') plt.boxplot(x, sym='*', vert=False) plt.title('Boxplot, horizontal') plt.xlabel('Values') plt.show() # Errorbars x = np.arange(5) y = x**2 errorBar = x/2 plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3) plt.xlim([-0.2, 4.2]) plt.ylim([-0.2, 19]) C2_8_mystyle.printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars') # Violinplot nd = stats.norm data = nd.rvs(size=(100)) nd2 = stats.norm(loc = 3, scale = 1.5) data2 = nd2.rvs(size=(100)) # Use pandas and the seaborn package for the violin plot df = pd.DataFrame({'Girls':data, 'Boys':data2}) sns.violinplot(df) C2_8_mystyle.printout('violinplot.png', title='Violinplot') # Barplot df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd']) df.plot(kind='bar', grid=False) C2_8_mystyle.printout('barplot.png', title='Barplot') # Grouped Boxplot sns.set_style('whitegrid') sns.boxplot(df) C2_8_mystyle.set(fs=28) C2_8_mystyle.printout('groupedBoxplot.png', title='sns.boxplot') # Bivariate Plots df2 = pd.DataFrame(np.random.rand(50, 4), columns=['a', 'b', 'c', 'd']) df2.plot(kind='scatter', x='a', y='b', s=df['c']*300); C2_8_mystyle.printout('bivariate.png') # Pieplot series = pd.Series(3 * np.random.rand(4), index=['a', 'b', 'c', 'd'], name='series') oldPalette = sns.color_palette() sns.set_palette("husl") series.plot(kind='pie', figsize=(6, 6)) C2_8_mystyle.printout('piePlot.png', title='pie-plot') sns.set_palette(oldPalette)
def simplePlots(): '''Demonstrate the generation of different statistical standard plots''' # Univariate data ------------------------- # Make sure that always the same random numbers are generated np.random.seed(1234) # Generate data that are normally distributed x = np.random.randn(500) # Other graphics settings sns.set(context='poster', style='ticks', palette=sns.color_palette('muted')) # Set the fonts the way I like them C2_8_mystyle.set(fs=32) # Scatter plot plt.scatter(np.arange(len(x)), x) plt.xlim([0, len(x)]) # Save and show the data, in a systematic format C2_8_mystyle.printout('scatterPlot.png', xlabel='Datapoints', ylabel='Values', title='Scatter') # Histogram plt.hist(x) C2_8_mystyle.printout('histogram_plain.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, default settings') plt.hist(x,25) C2_8_mystyle.printout('histogram.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, 25 bins') # Cumulative probability density numbins = 20 plt.plot(stats.cumfreq(x,numbins)[0]) C2_8_mystyle.printout('CumulativeFrequencyFunction.png', xlabel='Data Values', ylabel='CumFreq', title='Cumulative Frequency') # KDE-plot sns.kdeplot(x) C2_8_mystyle.printout('kde.png', xlabel='Data Values', ylabel='Density', title='KDE_plot') # Boxplot # The ox consists of the first, second (middle) and third quartile plt.boxplot(x, sym='*') C2_8_mystyle.printout('boxplot.png', xlabel='Values', title='Boxplot') plt.boxplot(x, sym='*', vert=False) plt.title('Boxplot, horizontal') plt.xlabel('Values') plt.show() # Errorbars x = np.arange(5) y = x**2 errorBar = x/2 plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3) plt.xlim([-0.2, 4.2]) plt.ylim([-0.2, 19]) C2_8_mystyle.printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars') # Violinplot nd = stats.norm data = nd.rvs(size=(100)) nd2 = stats.norm(loc = 3, scale = 1.5) data2 = nd2.rvs(size=(100)) # Use pandas and the seaborn package for the violin plot df = pd.DataFrame({'Girls':data, 'Boys':data2}) sns.violinplot(df) C2_8_mystyle.printout('violinplot.png', title='Violinplot') # Barplot # The font-size is set such that the legend does not overlap with the data np.random.seed(1234) C2_8_mystyle.set(20) df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd']) df.plot(kind='bar', grid=False, color=sns.color_palette('muted')) C2_8_mystyle.printout_plain('barplot.png') C2_8_mystyle.set(28) # Bivariate Plots df2 = pd.DataFrame(np.random.rand(50, 3), columns=['a', 'b', 'c']) df2.plot(kind='scatter', x='a', y='b', s=df2['c']*500); plt.axhline(0, ls='--', color='#999999') plt.axvline(0, ls='--', color='#999999') C2_8_mystyle.printout('bivariate.png') # Grouped Boxplot sns.set_style('whitegrid') sns.boxplot(df) C2_8_mystyle.set(fs=28) C2_8_mystyle.printout('groupedBoxplot.png', title='sns.boxplot') sns.set_style('ticks') # Pieplot txtLabels = 'Cats', 'Dogs', 'Frogs', 'Others' fractions = [45, 30, 15, 10] offsets =(0, 0.05, 0, 0) plt.pie(fractions, explode=offsets, labels=txtLabels, autopct='%1.1f%%', shadow=True, startangle=90, colors=sns.color_palette('muted') ) plt.axis('equal') C2_8_mystyle.printout('piePlot.png', title=' ')