def show_poisson_views():
    """Show different views of a Poisson distribution"""
    
    fig, ax = plt.subplots(3,1)
    
    k = np.arange(25)
    pd = stats.poisson(10)
    C2_8_mystyle.set(12)
    
    ax[0].plot(k, pd.pmf(k),'x-')
    ax[0].set_title('Poisson distribition')
    ax[0].set_xticklabels([])
    ax[0].set_ylabel('PMF (X)')
    
    ax[1].plot(k, pd.cdf(k))
    ax[1].set_xlabel('X')
    ax[1].set_ylabel('CDF (X)')
    
    y = np.linspace(0,1,100)
    ax[2].plot(y, pd.ppf(y))
    ax[2].set_xlabel('X')
    ax[2].set_ylabel('PPF (X)')
    
    plt.tight_layout()
    plt.show()
def KS_principle(inData):
    '''Show the principle of the Kolmogorov-Smirnov test.'''
    
    # CDF of normally distributed data
    nd = stats.norm()
    nd_x = np.linspace(-4, 4, 101)
    nd_y = nd.cdf(nd_x)
    
    # Empirical CDF of the sample data, which range for approximately 0 to 10
    numPts = 50
    lowerLim = 0
    upperLim = 10
    ecdf_x = np.linspace(lowerLim, upperLim, numPts)
    ecdf_y = stats.cumfreq(data, numPts, (lowerLim, upperLim))[0]/len(inData)
    
    #Add zero-point by hand
    ecdf_x = np.hstack((0., ecdf_x))
    ecdf_y = np.hstack((0., ecdf_y))
    
    # Plot the data
    sns.set_style('ticks')
    sns.set_context('poster')
    C2_8_mystyle.set(36)
    
    plt.plot(nd_x, nd_y, 'k--')
    plt.hold(True)
    plt.plot(ecdf_x, ecdf_y, color='k')
    plt.xlabel('X')
    plt.ylabel('Cumulative Probability')
    
    # For the arrow, find the start
    ecdf_startIndex = np.min(np.where(ecdf_x >= 2))
    arrowStart = np.array([ecdf_x[ecdf_startIndex], ecdf_y[ecdf_startIndex]])
    
    nd_startIndex = np.min(np.where(nd_x >= 2))
    arrowEnd = np.array([nd_x[nd_startIndex], nd_y[nd_startIndex]])
    arrowDelta = arrowEnd - arrowStart
    
    plt.arrow(arrowStart[0], arrowStart[1], 0, arrowDelta[1],
              width=0.05, length_includes_head=True, head_length=0.02, head_width=0.2, color='k')
    
    plt.arrow(arrowStart[0], arrowStart[1]+arrowDelta[1], 0, -arrowDelta[1],
              width=0.05, length_includes_head=True, head_length=0.02, head_width=0.2, color='k')
    
    outFile = 'KS_Example.png'
    C2_8_mystyle.printout_plain(outFile)
def generate_probplot():
    '''Generate a prob-plot for a chi2-distribution of sample data'''
    # Define the skewed distribution
    chi2 = stats.chi2(3)
    
    # Generate the data
    x = np.linspace(0,10, 100)
    y = chi2.pdf(x)
    np.random.seed(12345)
    numData = 100
    data = chi2.rvs(numData)
    
    # Arrange subplots
    sns.set_context('paper')
    sns.set_style('white')
    C2_8_mystyle.set(11)
    fig, axs = plt.subplots(1,2)
    
    # Plot distribution
    axs[0].plot(x,y)
    axs[0].set_xlabel('X')
    axs[0].set_ylabel('PDF(X)')
    axs[0].set_title('chi2(x), k=3')
    sns.set_style('white')
    
    x0, x1 = axs[0].get_xlim()
    y0, y1 = axs[0].get_ylim()
    axs[0].set_aspect((x1-x0)/(y1-y0))
    #sns.despine()
    
    
    # Plot probplot
    plt.axes(axs[1])
    stats.probplot(data, plot=plt)
    
    x0, x1 = axs[1].get_xlim()
    y0, y1 = axs[1].get_ylim()
    axs[1].axhline(0, lw=0.5, ls='--')
    axs[1].axvline(0, lw=0.5, ls='--')
    axs[1].set_aspect((x1-x0)/(y1-y0))
    #sns.despine()
    
    C2_8_mystyle.printout_plain('chi2pp.png')
    
    return(data)
    '''
def main():
    '''Demonstrate the generation of different statistical standard plots'''
    
    # Univariate data -------------------------
    # Generate data that are normally distributed
    x = np.random.randn(500)
    
    # Set the fonts the way I like them
    sns.set_context('poster')
    sns.set_style('ticks')
    C2_8_mystyle.set(fs=32)
    
    # Scatter plot
    plt.scatter(np.arange(len(x)), x)
    plt.xlim([0, len(x)])
    
    # Save and show the data, in a systematic format
    C2_8_mystyle.printout('scatterPlot.png', xlabel='x', ylabel='y', title='Scatter')
    
    # Histogram
    plt.hist(x)
    C2_8_mystyle.printout('histogram_plain.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, default settings')
    
    plt.hist(x,25)
    C2_8_mystyle.printout('histogram.png', xlabel='Data Values', ylabel='Frequency', title='Histogram, 25 bins')
    
    # Cumulative probability density
    numbins = 20
    plt.plot(stats.cumfreq(x,numbins)[0])
    C2_8_mystyle.printout('CumulativeFrequencyFunction.png', xlabel='Data Values', ylabel='CumFreq', title='Cumulative Frequncy')

    # KDE-plot
    sns.kdeplot(x)
    C2_8_mystyle.printout('kde.png', xlabel='Data Values', ylabel='Density',
            title='KDE_plot')
    
    # Boxplot
    # The ox consists of the first, second (middle) and third quartile
    plt.boxplot(x, sym='*')
    C2_8_mystyle.printout('boxplot.png', xlabel='Values', title='Boxplot')
    
    plt.boxplot(x, sym='*', vert=False)
    plt.title('Boxplot, horizontal')
    plt.xlabel('Values')
    plt.show()
    
    # Errorbars
    x = np.arange(5)
    y = x**2
    errorBar = x/2
    plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3)
    plt.xlim([-0.2, 4.2])
    plt.ylim([-0.2, 19])
    C2_8_mystyle.printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars')
    
    # Violinplot
    nd = stats.norm
    data = nd.rvs(size=(100))
    
    nd2 = stats.norm(loc = 3, scale = 1.5)
    data2 = nd2.rvs(size=(100))
    
    # Use pandas and the seaborn package for the violin plot
    df = pd.DataFrame({'Girls':data, 'Boys':data2})
    sns.violinplot(df)
    
    C2_8_mystyle.printout('violinplot.png', title='Violinplot')
    
    # Barplot
    df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])
    df.plot(kind='bar', grid=False)
    C2_8_mystyle.printout('barplot.png', title='Barplot')

    # Grouped Boxplot
    sns.set_style('whitegrid')
    sns.boxplot(df)
    C2_8_mystyle.set(fs=28)
    C2_8_mystyle.printout('groupedBoxplot.png', title='sns.boxplot')

    # Bivariate Plots
    df2 = pd.DataFrame(np.random.rand(50, 4), columns=['a', 'b', 'c', 'd'])
    df2.plot(kind='scatter', x='a', y='b', s=df['c']*300);
    C2_8_mystyle.printout('bivariate.png')

    # Pieplot
    series = pd.Series(3 * np.random.rand(4), index=['a', 'b', 'c', 'd'], name='series')
    oldPalette = sns.color_palette()
    sns.set_palette("husl")
    series.plot(kind='pie', figsize=(6, 6))
    C2_8_mystyle.printout('piePlot.png', title='pie-plot')
    sns.set_palette(oldPalette)
"miR-137" is a short non-coding RNA molecule that functions to regulate
the expression levels of other genes.
'''
# author: Thomas Haslwanter, date: Jun-2015

# Import standard packages
import matplotlib.pyplot as plt
import C2_8_mystyle as mystyle

# additional packages
from lifelines.datasets import load_waltons
from lifelines import KaplanMeierFitter
from lifelines.statistics import logrank_test

# Set my favorite font
mystyle.set()

# Load and show the data
df = load_waltons() # returns a Pandas DataFrame

print(df.head())
'''
    T  E    group
0   6  1  miR-137
1  13  1  miR-137
2  13  1  miR-137
3  13  1  miR-137
4  19  1  miR-137
'''

T = df['T']
Exemplo n.º 6
0
# author: Thomas Haslwanter, date: July-2015


# Import standard packages
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns
import C2_8_mystyle

x = np.logspace(-9,1,1001)+1e-9
lnd = stats.lognorm(2)
y = lnd.pdf(x)

sns.set_style('ticks')
C2_8_mystyle.set(18)
fig, axs = plt.subplots(1,2, sharey=True)
sns.set_context('poster')

axs[0].plot(x,y)
axs[0].set_xlim(-0.1,8)
axs[0].set_xlabel('x')
axs[0].set_ylabel('pdf(x)')

axs[1].plot(np.log(x), y)
axs[1].set_xlim(-12,5)
axs[1].set_xlabel('log(x)')

outFile = 'logNormal.png'
C2_8_mystyle.printout_plain(outFile)
Exemplo n.º 7
0
# additional packages
import C2_8_mystyle

# Calculate the values
nd = stats.norm()

x = np.linspace(-3,3,100)
yp = nd.pdf(x)
y = nd.cdf(x)
x1 = np.linspace(-3, 1)
y1 = nd.pdf(x1)

# Make the plot
sns.set_context('paper')
sns.set_style('white')
C2_8_mystyle.set(12)

figs, axs = plt.subplots(1,2)

axs[0].plot(x,yp, 'k')
axs[0].fill_between(x1, y1, facecolor='#CCCCCC')
axs[0].text(0, 0.1, 'CDF(x)', family='cursive', fontsize=14, horizontalalignment='center', style='italic')
axs[0].set_xlabel('x')
axs[0].set_ylabel('PDF(x)')
sns.despine()

axs[1].plot(x, y, '#999999', lw=3)
axs[1].set_xlabel('x')
axs[1].set_ylabel('CDF(x)')
plt.vlines(0, 0, 1, linestyles='--')
sns.despine()
        
    ax.xaxis.set_ticks([50,150,250])
    ax.set_xticklabels(['Group1', 'Group2', 'Group3'])
    ax.yaxis.set_ticks([])
    ax.set_title(title)
    
    grandMean = np.mean(groupMean)
    ax.axhline(grandMean, color='#999999')
    ax.plot([80, 220], [groupMean[1], groupMean[1]], '#999999')
    ax.plot([80, 120], [groupMean[1]+0.2, groupMean[1]+0.2], '#999999')
    ax.annotate('', xy=(210, grandMean), xytext=(210,groupMean[1]), 
            arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black'))
    ax.annotate('', xy=(90, groupMean[1]), xytext=(90,groupMean[1]+0.2), 
            arrowprops=dict(arrowstyle='<->, head_width=0.1', facecolor='black'))
    ax.text(210, (grandMean + groupMean[1])/2., '$SS_{Treatment}$', fontsize=36)
    ax.text(90, groupMean[1]+0.1, '$SS_{Error}$', ha='right', fontsize=36)

if __name__ == '__main__':
    centers = [5, 5.3, 4.7]
    
    np.random.seed(123)
    C2_8_mystyle.set(30)
    
    fig = plt.figure()
    ax = fig.add_subplot(111)
    std = 0.1
    numData = 100
    show_fig(0.1, ax, 'Sum-Squares')
    
    # Save and show
    C2_8_mystyle.printout_plain('anova_annotated.png')
Exemplo n.º 9
0
def simplePlots():
    '''Demonstrate the generation of different statistical standard plots'''
    
    # Univariate data -------------------------
    
    # Make sure that always the same random numbers are generated
    np.random.seed(1234)
    
    # Generate data that are normally distributed
    x = np.random.randn(500)
    
    # Other graphics settings
    sns.set(context='poster', style='ticks', palette=sns.color_palette('muted'))
    
    # Set the fonts the way I like them
    C2_8_mystyle.set(fs=32)
    
    # Scatter plot
    plt.scatter(np.arange(len(x)), x)
    plt.xlim([0, len(x)])
    
    # Save and show the data, in a systematic format
    C2_8_mystyle.printout('scatterPlot.png', xlabel='Datapoints', ylabel='Values', title='Scatter')
    
    # Histogram
    plt.hist(x)
    C2_8_mystyle.printout('histogram_plain.png', xlabel='Data Values',
                          ylabel='Frequency', title='Histogram, default settings')
    
    plt.hist(x,25)
    C2_8_mystyle.printout('histogram.png', xlabel='Data Values', ylabel='Frequency',
                          title='Histogram, 25 bins')
    
    # Cumulative probability density
    numbins = 20
    plt.plot(stats.cumfreq(x,numbins)[0])
    C2_8_mystyle.printout('CumulativeFrequencyFunction.png', xlabel='Data Values',
                          ylabel='CumFreq', title='Cumulative Frequency')

    # KDE-plot
    sns.kdeplot(x)
    C2_8_mystyle.printout('kde.png', xlabel='Data Values', ylabel='Density',
            title='KDE_plot')
    
    # Boxplot
    # The ox consists of the first, second (middle) and third quartile
    plt.boxplot(x, sym='*')
    C2_8_mystyle.printout('boxplot.png', xlabel='Values', title='Boxplot')
    
    plt.boxplot(x, sym='*', vert=False)
    plt.title('Boxplot, horizontal')
    plt.xlabel('Values')
    plt.show()
    
    # Errorbars
    x = np.arange(5)
    y = x**2
    errorBar = x/2
    plt.errorbar(x,y, yerr=errorBar, fmt='o', capsize=5, capthick=3)
    plt.xlim([-0.2, 4.2])
    plt.ylim([-0.2, 19])
    C2_8_mystyle.printout('Errorbars.png', xlabel='Data Values', ylabel='Measurements', title='Errorbars')
    
    # Violinplot
    nd = stats.norm
    data = nd.rvs(size=(100))
    
    nd2 = stats.norm(loc = 3, scale = 1.5)
    data2 = nd2.rvs(size=(100))
    
    # Use pandas and the seaborn package for the violin plot
    df = pd.DataFrame({'Girls':data, 'Boys':data2})
    sns.violinplot(df)
    
    C2_8_mystyle.printout('violinplot.png', title='Violinplot')
    
    # Barplot
    # The font-size is set such that the legend does not overlap with the data
    np.random.seed(1234)
    C2_8_mystyle.set(20)
    
    df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])
    df.plot(kind='bar', grid=False, color=sns.color_palette('muted'))
    
    C2_8_mystyle.printout_plain('barplot.png')
    C2_8_mystyle.set(28)

    # Bivariate Plots
    df2 = pd.DataFrame(np.random.rand(50, 3), columns=['a', 'b', 'c'])
    df2.plot(kind='scatter', x='a', y='b', s=df2['c']*500);
    plt.axhline(0, ls='--', color='#999999')
    plt.axvline(0, ls='--', color='#999999')
    C2_8_mystyle.printout('bivariate.png')
    
    # Grouped Boxplot
    sns.set_style('whitegrid')
    sns.boxplot(df)
    C2_8_mystyle.set(fs=28)
    C2_8_mystyle.printout('groupedBoxplot.png', title='sns.boxplot')

    sns.set_style('ticks')

    # Pieplot
    txtLabels = 'Cats', 'Dogs', 'Frogs', 'Others'
    fractions = [45, 30, 15, 10]
    offsets =(0, 0.05, 0, 0)
    
    plt.pie(fractions, explode=offsets, labels=txtLabels,
            autopct='%1.1f%%', shadow=True, startangle=90,
            colors=sns.color_palette('muted') )
    plt.axis('equal')
    C2_8_mystyle.printout('piePlot.png', title=' ')
Exemplo n.º 10
0
import C2_8_mystyle

def show_fig(std, ax, title):
    '''Create a plot of normally distributed data in a given axis'''
    
    for ii in range(3):
        data = stats.norm(centers[ii], std).rvs(numData)
        offset = ii*numData
        ax.plot( offset+np.arange(numData), data, '.', ms=10)
        
    ax.xaxis.set_ticks([50,150,250])
    ax.set_xticklabels(['Group1', 'Group2', 'Group3'])
    ax.set_title(title)
    sns.despine()

if __name__ == '__main__':
    
    # Set up the figure
    sns.set_context('paper')
    sns.set_style('whitegrid')
    C2_8_mystyle.set(14)
    
    # Create 2 plots of 3 different, normally distributed data groups, with different SDs
    fig, axs = plt.subplots(1, 2)
    centers = [5, 5.3, 4.7]
    stds = [0.1, 2]
    numData = 100
    show_fig(0.1, axs[0], 'SD=0.1')
    show_fig(2,   axs[1], 'SD=2.0')
    
    C2_8_mystyle.printout_plain('anova_oneway.png')