def how_does_current_effect_depend_on_lane_position(lanes, f_13): # Compute the slope and intercept of the frac diff/lane curve slope, intercept = np.polyfit(lanes, f_13, deg=1) _ = plt.plot(lanes, f_13, marker='.', markersize=12, linestyle='none') _ = plt.xlabel('lane') _ = plt.ylabel('frac. diff. (odd-even)') # Compute bootstrap replicates bs_reps_slope, bs_reps_int = dcst.draw_bs_pairs_linreg(lanes, f_13, size=10000) # Compute 95% confidence interval of slope conf_int = np.percentile(bs_reps_slope, [2.5, 97.5]) # Print slope and confidence interval print(""" slope: {0:.5f} per lane 95% conf int: [{1:.5f}, {2:.5f}] per lane""".format(slope, *conf_int)) # x-values for plotting regression lines x = np.array([1, 8]) # Plot 100 bootstrap replicate lines for i in range(100): _ = plt.plot(x, bs_reps_slope[i] * x + bs_reps_int[i], color='red', alpha=0.2, linewidth=0.5) # Update the plot plt.draw() plt.show()
def linear_regression_of_average_split_time(split_number, splits): mean_splits = np.mean(splits, axis=0) # Perform regression slowdown, split_3 = np.polyfit(split_number, mean_splits, deg=1) # Compute pairs bootstrap bs_reps, _ = dcst.draw_bs_pairs_linreg( split_number, mean_splits, size=10000) # Compute confidence interval conf_int = np.percentile(bs_reps, [2.5, 97.5]) # Plot the data with regressions line _ = plt.plot(split_number, mean_splits, marker='.', linestyle='none') _ = plt.plot(split_number, slowdown * split_number + split_3, '-') # Label axes and show plot _ = plt.xlabel('split number') _ = plt.ylabel('split time (s)') plt.show() # Print the slowdown per split print(""" mean slowdown: {0:.3f} sec./split 95% conf int of mean slowdown: [{1:.3f}, {2:.3f}] sec./split""".format( slowdown, *conf_int))
def assessing_the_growth_rate(bac_area, t): # Compute logarithm of the bacterial area: log_bac_area log_bac_area = np.log(bac_area) # Compute the slope and intercept: growth_rate, log_a0 growth_rate, log_a0 = np.polyfit(t, log_bac_area, 1) # Draw 10,000 pairs bootstrap replicates: growth_rate_bs_reps, log_a0_bs_reps growth_rate_bs_reps, log_a0_bs_reps = dcst.draw_bs_pairs_linreg( t, log_bac_area, size=10000) # Compute confidence intervals: growth_rate_conf_int growth_rate_conf_int = np.percentile(growth_rate_bs_reps, [2.5, 97.5]) # Print the result to the screen print(""" Growth rate: {0:.4f} 1/hour 95% conf int: [{1:.4f}, {2:.4f}] 1/hour """.format(growth_rate, *growth_rate_conf_int))
def test_draw_bs_pairs_linreg(): for n in range(10, 20): for size in [1, 10, 100]: x = np.random.random(n) y = 1.5 * x + 3.0 + (np.random.random(n) - 0.5) * 0.1 seed = np.random.randint(0, 100000) np.random.seed(seed) slope, intercept = no_numba.draw_bs_pairs_linreg(x, y, size=size) np.random.seed(seed) slope_correct, intercept_correct = original.draw_bs_pairs_linreg( x, y, size=size) slope_test_numba, intercept_test_numba = dcst.draw_bs_pairs_linreg( x, y, size=size) assert np.allclose(slope, slope_correct, atol=atol, equal_nan=True) assert np.allclose(intercept, intercept_correct, atol=atol, equal_nan=True)
def plotting_growth_curve(bac_area, t): log_bac_area = np.log(bac_area) growth_rate_bs_reps, log_a0_bs_reps = dcst.draw_bs_pairs_linreg( t, log_bac_area, size=10000) # Plot data points in a semilog-y plot with axis labeles _ = plt.semilogy(t, bac_area, marker='.', linestyle='none') # Generate x-values for the bootstrap lines: t_bs t_bs = np.array([0, 14]) # Plot the first 100 bootstrap lines for i in range(100): y = np.exp(growth_rate_bs_reps[i] * t_bs + log_a0_bs_reps[i]) _ = plt.semilogy(t_bs, y, linewidth=0.5, alpha=0.05, color='red') # Label axes and show plot _ = plt.xlabel('time (hr)') _ = plt.ylabel('area (sq. µm)') plt.show()
def test_draw_bs_pairs_linreg_nan(): x = np.array([]) y = np.array([]) with pytest.raises(RuntimeError) as excinfo: dcst.draw_bs_pairs_linreg(x, y, size=1) excinfo.match("Arrays must have at least 2 mutual non-NaN entries.") x = np.array([np.nan]) y = np.array([np.nan]) with pytest.raises(RuntimeError) as excinfo: dcst.draw_bs_pairs_linreg(x, y, size=1) excinfo.match("Arrays must have at least 2 mutual non-NaN entries.") x = np.array([np.nan, 1]) y = np.array([1, np.nan]) with pytest.raises(RuntimeError) as excinfo: dcst.draw_bs_pairs_linreg(x, y, size=1) excinfo.match("Arrays must have at least 2 mutual non-NaN entries.") x = np.array([0, 1, 5]) y = np.array([1, np.inf, 3]) with pytest.raises(RuntimeError) as excinfo: dcst.draw_bs_pairs_linreg(x, y, size=1) excinfo.match("All entries in arrays must be finite.")
"""Linear regression of average split time We will assume that the swimmers slow down in a linear fashion over the course of the 800 m event. The slowdown per split is then the slope of the mean split time versus split number plot. Perform a linear regression to estimate the slowdown per split and compute a pairs bootstrap 95% confidence interval on the slowdown. Also show a plot of the best fit line. Note: We can compute error bars for the mean split times and use those in the regression analysis, but we will not take those into account here, as that is beyond the scope of this course.""" import numpy as np import matplotlib.pyplot as plt import dc_stat_think as dcst # Perform regression slowdown, split_3 = np.polyfit(split_number,mean_splits,1) # Compute pairs bootstrap bs_reps, _ = dcst.draw_bs_pairs_linreg(split_number,mean_splits,size=10000) # Compute confidence interval conf_int = np.percentile(bs_reps,[2.5,97.5]) # Plot the data with regressions line _ = plt.plot(split_number, mean_splits, marker='.', linestyle='none') _ = plt.plot(split_number, slowdown * split_number + split_3, '-') # Label axes and show plot _ = plt.xlabel('split number') _ = plt.ylabel('split time (s)') plt.show() # Print the slowdown per split print("""
# Compute and print p-value: p p = np.sum(bs_reps >= diff_means_exp) / len(bs_reps) print('p-value =', p) #%% # Compute logarithm of the bacterial area: log_bac_area log_bac_area = np.log(bac_area) # Compute the slope and intercept: growth_rate, log_a0 growth_rate, log_a0 = np.polyfit(t,log_bac_area,deg=1) # Draw 10,000 pairs bootstrap replicates: growth_rate_bs_reps, log_a0_bs_reps growth_rate_bs_reps, log_a0_bs_reps = \ dcst.draw_bs_pairs_linreg(t, log_bac_area, size=10000) # Compute confidence intervals: growth_rate_conf_int growth_rate_conf_int = np.percentile(growth_rate_bs_reps,[2.5,97.5]) # Print the result to the screen print(""" Growth rate: {0:.4f} sq. µm/hour 95% conf int: [{1:.4f}, {2:.4f}] sq. µm/hour """.format(growth_rate, *growth_rate_conf_int)) #%% # Plot data points in a semilog-y plot with axis labeles _ = plt.semilogy(t, bac_area, marker='.', linestyle='none') # Generate x-values for the bootstrap lines: t_bs
import dc_stat_think as dcst bac_area, t = np.loadtxt('../datasets/park_bacterial_growth.csv', delimiter=',', skiprows=3, comments='#').T ''' INSTRUCTIONS * Compute the logarithm of the bacterial area (bac_area) using np.log() and store the result in the variable log_bac_area. * Compute the slope and intercept of the semilog growth curve using np.polyfit(). Store the slope in the variable growth_rate and the intercept in log_a0. * Draw 10,000 pairs bootstrap replicates of the growth rate and log initial area using dcst.draw_bs_pairs_linreg(). Store the results in growth_rate_bs_reps and log_a0_bs_reps. * Use np.percentile() to compute the 95% confidence interval of the growth rate (growth_rate_bs_reps). * Print the growth rate and confidence interval to the screen. This has been done for you, so hit 'Submit Answer' to view the results! ''' # Compute logarithm of the bacterial area: log_bac_area log_bac_area = np.log(bac_area) # Compute the slope and intercept: growth_rate, log_a0 growth_rate, log_a0 = np.polyfit(t, log_bac_area, 1) # Draw 10,000 pairs bootstrap replicates: growth_rate_bs_reps, log_a0_bs_reps growth_rate_bs_reps, log_a0_bs_reps = dcst.draw_bs_pairs_linreg(t, log_bac_area, size=10000) # Compute confidence intervals: growth_rate_conf_int growth_rate_conf_int = np.percentile(growth_rate_bs_reps, [2.5, 97.5]) # Print the result to the screen print(""" Growth rate: {0:.4f} sq. µm/hour 95% conf int: [{1:.4f}, {2:.4f}] sq. µm/hour """.format(growth_rate, *growth_rate_conf_int))
''' INSTRUCTIONS * Compute the slope and intercept of the f_13 versus lanes line using np.polyfit(). * Use dcst.draw_bs_pairs_linreg() to get 10,000 bootstrap replicates of the slope and intercept, storing them respectively in bs_reps_slope and bs_reps_int. * Use the bootstrap replicates to compute a 95% confidence interval for the slope. * Print the slope and 95% confidence interval to the screen. This has been done for you. * Using np.array(), generate x-values to use for the plot of the bootstrap lines. x should go from 1 to 8. * The plot is already populated with the data. Write a for loop to add 100 bootstrap lines to the plot using the keyword arguments color='red', alpha=0.2, and linewidth=0.5. ''' # Compute the slope and intercept of the frac diff/lane curve slope, intercept = np.polyfit(lanes, f_13, 1) # Compute bootstrap replicates bs_reps_slope, bs_reps_int = dcst.draw_bs_pairs_linreg(lanes, f_13, size=10000) # Compute 95% confidence interval of slope conf_int = np.percentile(bs_reps_slope, [2.5, 97.5]) # Print slope and confidence interval print(""" slope: {0:.5f} per lane 95% conf int: [{1:.5f}, {2:.5f}] per lane""".format(slope, *conf_int)) # x-values for plotting regression lines x = np.array([1, 8]) # Plot 100 bootstrap replicate lines for i in range(100): _ = plt.plot(x,
def test_draw_bs_pairs_linreg_edge(): x = np.ones(10) y = np.ones(10) slope, intercept = dcst.draw_bs_pairs_linreg(x, y, size=10) assert np.isnan(slope).all() assert np.isnan(intercept).all()
p = np.sum(bs_reps >= diff_means_exp) / len(bs_reps) print('p-value =', p) # ------------------------------------------------------- # Assessing the growth rate # ------------------------------------------------------- # Compute logarithm of the bacterial area: log_bac_area log_bac_area = np.log(bac_area) # Compute the slope and intercept: growth_rate, log_a0 growth_rate, log_a0 = np.polyfit(t,log_bac_area, 1) # Draw 10,000 pairs bootstrap replicates: growth_rate_bs_reps, log_a0_bs_reps growth_rate_bs_reps, log_a0_bs_reps = \ dcst.draw_bs_pairs_linreg(t,log_bac_area, size=10000) # Compute confidence intervals: growth_rate_conf_int growth_rate_conf_int = np.percentile(growth_rate_bs_reps,[2.5,97.5]) # Print the result to the screen print(""" Growth rate: {0:.4f} sq. µm/hour 95% conf int: [{1:.4f}, {2:.4f}] sq. µm/hour """.format(growth_rate, *growth_rate_conf_int)) # ------------------------------------------------------- # Plotting the growth curve # ------------------------------------------------------- # Plot data points in a semilog-y plot with axis labeles