Python draw_bs_pairs_linreg示例，dc_stat_think.draw_bs_pairs_linreg Python示例

示例#1

0

显示文件

def how_does_current_effect_depend_on_lane_position(lanes, f_13):
    # Compute the slope and intercept of the frac diff/lane curve
    slope, intercept = np.polyfit(lanes, f_13, deg=1)
    _ = plt.plot(lanes, f_13, marker='.', markersize=12, linestyle='none')
    _ = plt.xlabel('lane')
    _ = plt.ylabel('frac. diff. (odd-even)')

    # Compute bootstrap replicates
    bs_reps_slope, bs_reps_int = dcst.draw_bs_pairs_linreg(lanes,
                                                           f_13,
                                                           size=10000)

    # Compute 95% confidence interval of slope
    conf_int = np.percentile(bs_reps_slope, [2.5, 97.5])

    # Print slope and confidence interval
    print("""
    slope: {0:.5f} per lane
    95% conf int: [{1:.5f}, {2:.5f}] per lane""".format(slope, *conf_int))

    # x-values for plotting regression lines
    x = np.array([1, 8])

    # Plot 100 bootstrap replicate lines
    for i in range(100):
        _ = plt.plot(x,
                     bs_reps_slope[i] * x + bs_reps_int[i],
                     color='red',
                     alpha=0.2,
                     linewidth=0.5)

    # Update the plot
    plt.draw()
    plt.show()

示例#2

0

显示文件

文件： 23_performance_decline_over_long_distance.py 项目： printfCRLF/pp

def linear_regression_of_average_split_time(split_number, splits):
    mean_splits = np.mean(splits, axis=0)

    # Perform regression
    slowdown, split_3 = np.polyfit(split_number, mean_splits, deg=1)

    # Compute pairs bootstrap
    bs_reps, _ = dcst.draw_bs_pairs_linreg(
        split_number, mean_splits, size=10000)

    # Compute confidence interval
    conf_int = np.percentile(bs_reps, [2.5, 97.5])

    # Plot the data with regressions line
    _ = plt.plot(split_number, mean_splits, marker='.', linestyle='none')
    _ = plt.plot(split_number, slowdown * split_number + split_3, '-')

    # Label axes and show plot
    _ = plt.xlabel('split number')
    _ = plt.ylabel('split time (s)')
    plt.show()

    # Print the slowdown per split
    print("""
    mean slowdown: {0:.3f} sec./split
    95% conf int of mean slowdown: [{1:.3f}, {2:.3f}] sec./split""".format(
        slowdown, *conf_int))

示例#3

0

显示文件

文件： 12_linear_regression_and_pairs_bootstrap.py 项目： printfCRLF/pp

def assessing_the_growth_rate(bac_area, t):
    # Compute logarithm of the bacterial area: log_bac_area
    log_bac_area = np.log(bac_area)

    # Compute the slope and intercept: growth_rate, log_a0
    growth_rate, log_a0 = np.polyfit(t, log_bac_area, 1)

    # Draw 10,000 pairs bootstrap replicates: growth_rate_bs_reps, log_a0_bs_reps
    growth_rate_bs_reps, log_a0_bs_reps = dcst.draw_bs_pairs_linreg(
        t, log_bac_area, size=10000)

    # Compute confidence intervals: growth_rate_conf_int
    growth_rate_conf_int = np.percentile(growth_rate_bs_reps, [2.5, 97.5])

    # Print the result to the screen
    print("""
    Growth rate: {0:.4f} 1/hour
    95% conf int: [{1:.4f}, {2:.4f}] 1/hour
    """.format(growth_rate, *growth_rate_conf_int))

示例#4

0

显示文件

def test_draw_bs_pairs_linreg():
    for n in range(10, 20):
        for size in [1, 10, 100]:
            x = np.random.random(n)
            y = 1.5 * x + 3.0 + (np.random.random(n) - 0.5) * 0.1

            seed = np.random.randint(0, 100000)

            np.random.seed(seed)
            slope, intercept = no_numba.draw_bs_pairs_linreg(x, y, size=size)
            np.random.seed(seed)
            slope_correct, intercept_correct = original.draw_bs_pairs_linreg(
                x, y, size=size)
            slope_test_numba, intercept_test_numba = dcst.draw_bs_pairs_linreg(
                x, y, size=size)
            assert np.allclose(slope, slope_correct, atol=atol, equal_nan=True)
            assert np.allclose(intercept,
                               intercept_correct,
                               atol=atol,
                               equal_nan=True)

示例#5

0

显示文件

文件： 12_linear_regression_and_pairs_bootstrap.py 项目： printfCRLF/pp

def plotting_growth_curve(bac_area, t):
    log_bac_area = np.log(bac_area)
    growth_rate_bs_reps, log_a0_bs_reps = dcst.draw_bs_pairs_linreg(
        t, log_bac_area, size=10000)

    # Plot data points in a semilog-y plot with axis labeles
    _ = plt.semilogy(t, bac_area, marker='.', linestyle='none')

    # Generate x-values for the bootstrap lines: t_bs
    t_bs = np.array([0, 14])

    # Plot the first 100 bootstrap lines
    for i in range(100):
        y = np.exp(growth_rate_bs_reps[i] * t_bs + log_a0_bs_reps[i])
        _ = plt.semilogy(t_bs, y, linewidth=0.5, alpha=0.05, color='red')

    # Label axes and show plot
    _ = plt.xlabel('time (hr)')
    _ = plt.ylabel('area (sq. µm)')
    plt.show()

示例#6

0

显示文件

文件： test_dc_stat_think.py 项目： Schots/dc_stat_think

def test_draw_bs_pairs_linreg_nan():
    x = np.array([])
    y = np.array([])
    with pytest.raises(RuntimeError) as excinfo:
        dcst.draw_bs_pairs_linreg(x, y, size=1)
    excinfo.match("Arrays must have at least 2 mutual non-NaN entries.")

    x = np.array([np.nan])
    y = np.array([np.nan])
    with pytest.raises(RuntimeError) as excinfo:
        dcst.draw_bs_pairs_linreg(x, y, size=1)
    excinfo.match("Arrays must have at least 2 mutual non-NaN entries.")

    x = np.array([np.nan, 1])
    y = np.array([1, np.nan])
    with pytest.raises(RuntimeError) as excinfo:
        dcst.draw_bs_pairs_linreg(x, y, size=1)
    excinfo.match("Arrays must have at least 2 mutual non-NaN entries.")

    x = np.array([0, 1, 5])
    y = np.array([1, np.inf, 3])
    with pytest.raises(RuntimeError) as excinfo:
        dcst.draw_bs_pairs_linreg(x, y, size=1)
    excinfo.match("All entries in arrays must be finite.")

示例#7

0

显示文件

文件： linear_reg_slowdown.py 项目： hackassin/Neural-Networks-DataCamp

"""Linear regression of average split time

We will assume that the swimmers slow down in a linear fashion over the course of the 800 m event.
The slowdown per split is then the slope of the mean split time versus split number plot.
Perform a linear regression to estimate the slowdown per split and compute a pairs bootstrap 95% confidence interval
on the slowdown. Also show a plot of the best fit line.
Note: We can compute error bars for the mean split times and use those in the regression analysis,
but we will not take those into account here, as that is beyond the scope of this course."""
import numpy as np
import matplotlib.pyplot as plt
import dc_stat_think as dcst
# Perform regression
slowdown, split_3 = np.polyfit(split_number,mean_splits,1)

# Compute pairs bootstrap
bs_reps, _ = dcst.draw_bs_pairs_linreg(split_number,mean_splits,size=10000)

# Compute confidence interval
conf_int = np.percentile(bs_reps,[2.5,97.5])

# Plot the data with regressions line
_ = plt.plot(split_number, mean_splits, marker='.', linestyle='none')
_ = plt.plot(split_number, slowdown * split_number  + split_3, '-')

# Label axes and show plot
_ = plt.xlabel('split number')
_ = plt.ylabel('split time (s)')
plt.show()

# Print the slowdown per split
print("""

示例#8

0

显示文件

# Compute and print p-value: p
p = np.sum(bs_reps >= diff_means_exp) / len(bs_reps)
print('p-value =', p)

#%%

# Compute logarithm of the bacterial area: log_bac_area
log_bac_area = np.log(bac_area)

# Compute the slope and intercept: growth_rate, log_a0
growth_rate, log_a0 =  np.polyfit(t,log_bac_area,deg=1)

# Draw 10,000 pairs bootstrap replicates: growth_rate_bs_reps, log_a0_bs_reps
growth_rate_bs_reps, log_a0_bs_reps = \
            dcst.draw_bs_pairs_linreg(t, log_bac_area, size=10000)
    
# Compute confidence intervals: growth_rate_conf_int
growth_rate_conf_int = np.percentile(growth_rate_bs_reps,[2.5,97.5])

# Print the result to the screen
print("""
Growth rate: {0:.4f} sq. µm/hour
95% conf int: [{1:.4f}, {2:.4f}] sq. µm/hour
""".format(growth_rate, *growth_rate_conf_int))

#%%
# Plot data points in a semilog-y plot with axis labeles
_ = plt.semilogy(t, bac_area, marker='.', linestyle='none')

# Generate x-values for the bootstrap lines: t_bs

示例#9

0

显示文件

import dc_stat_think as dcst

bac_area, t = np.loadtxt('../datasets/park_bacterial_growth.csv', delimiter=',', skiprows=3, comments='#').T

'''
INSTRUCTIONS

*   Compute the logarithm of the bacterial area (bac_area) using np.log() and store the result in the variable log_bac_area.
*   Compute the slope and intercept of the semilog growth curve using np.polyfit(). Store the slope in the variable growth_rate and the intercept in log_a0.
*   Draw 10,000 pairs bootstrap replicates of the growth rate and log initial area using dcst.draw_bs_pairs_linreg(). Store the results in growth_rate_bs_reps and log_a0_bs_reps.
*   Use np.percentile() to compute the 95% confidence interval of the growth rate (growth_rate_bs_reps).
*   Print the growth rate and confidence interval to the screen. This has been done for you, so hit 'Submit Answer' to view the results!
'''

# Compute logarithm of the bacterial area: log_bac_area
log_bac_area = np.log(bac_area)

# Compute the slope and intercept: growth_rate, log_a0
growth_rate, log_a0 = np.polyfit(t, log_bac_area, 1)

# Draw 10,000 pairs bootstrap replicates: growth_rate_bs_reps, log_a0_bs_reps
growth_rate_bs_reps, log_a0_bs_reps = dcst.draw_bs_pairs_linreg(t, log_bac_area, size=10000)
    
# Compute confidence intervals: growth_rate_conf_int
growth_rate_conf_int = np.percentile(growth_rate_bs_reps, [2.5, 97.5])

# Print the result to the screen
print("""
Growth rate: {0:.4f} sq. µm/hour
95% conf int: [{1:.4f}, {2:.4f}] sq. µm/hour
""".format(growth_rate, *growth_rate_conf_int))

示例#10

0

显示文件

文件： 09-how-does-the-current-effect-depend-on-lane-position.py 项目： mehrdad2275/datacamp-1

'''
INSTRUCTIONS

*   Compute the slope and intercept of the f_13 versus lanes line using np.polyfit().
*   Use dcst.draw_bs_pairs_linreg() to get 10,000 bootstrap replicates of the slope and intercept, storing them respectively in bs_reps_slope and bs_reps_int.
*   Use the bootstrap replicates to compute a 95% confidence interval for the slope.
*   Print the slope and 95% confidence interval to the screen. This has been done for you.
*   Using np.array(), generate x-values to use for the plot of the bootstrap lines. x should go from 1 to 8.
*   The plot is already populated with the data. Write a for loop to add 100 bootstrap lines to the plot using the keyword arguments color='red', alpha=0.2, and linewidth=0.5.
'''

# Compute the slope and intercept of the frac diff/lane curve
slope, intercept = np.polyfit(lanes, f_13, 1)

# Compute bootstrap replicates
bs_reps_slope, bs_reps_int = dcst.draw_bs_pairs_linreg(lanes, f_13, size=10000)

# Compute 95% confidence interval of slope
conf_int = np.percentile(bs_reps_slope, [2.5, 97.5])

# Print slope and confidence interval
print("""
slope: {0:.5f} per lane
95% conf int: [{1:.5f}, {2:.5f}] per lane""".format(slope, *conf_int))

# x-values for plotting regression lines
x = np.array([1, 8])

# Plot 100 bootstrap replicate lines
for i in range(100):
    _ = plt.plot(x,

示例#11

0

显示文件

文件： test_dc_stat_think.py 项目： Schots/dc_stat_think

def test_draw_bs_pairs_linreg_edge():
    x = np.ones(10)
    y = np.ones(10)
    slope, intercept = dcst.draw_bs_pairs_linreg(x, y, size=10)
    assert np.isnan(slope).all()
    assert np.isnan(intercept).all()

示例#12

0

显示文件

p = np.sum(bs_reps >= diff_means_exp) / len(bs_reps)
print('p-value =', p)

# -------------------------------------------------------
# Assessing the growth rate
# -------------------------------------------------------

# Compute logarithm of the bacterial area: log_bac_area
log_bac_area = np.log(bac_area)

# Compute the slope and intercept: growth_rate, log_a0
growth_rate, log_a0 = np.polyfit(t,log_bac_area, 1)

# Draw 10,000 pairs bootstrap replicates: growth_rate_bs_reps, log_a0_bs_reps
growth_rate_bs_reps, log_a0_bs_reps = \
            dcst.draw_bs_pairs_linreg(t,log_bac_area, size=10000)
    
# Compute confidence intervals: growth_rate_conf_int
growth_rate_conf_int = np.percentile(growth_rate_bs_reps,[2.5,97.5])

# Print the result to the screen
print("""
Growth rate: {0:.4f} sq. µm/hour
95% conf int: [{1:.4f}, {2:.4f}] sq. µm/hour
""".format(growth_rate, *growth_rate_conf_int))

# -------------------------------------------------------
# Plotting the growth curve
# -------------------------------------------------------

# Plot data points in a semilog-y plot with axis labeles