"""Parameter estimates of beak depths

Estimate the difference of the mean beak depth of the G. scandens samples from 1975 and 2012 and
report a 95% confidence interval."""

import pandas as pd
import numpy as np
from customlib import finch_beaks_df as finch
from customlib import bootstrap_repl as bt
bd_1975, bd_2012, bl_1975, bl_2012 = finch.draw_finch_data()

# Compute the difference of the sample means: mean_diff
mean_diff = np.mean(bd_2012) - np.mean(bd_1975)

# Get bootstrap replicates of means
bs_replicates_1975 = bt.draw_bs_reps(bd_1975,np.mean,10000)
bs_replicates_2012 = bt.draw_bs_reps(bd_2012,np.mean,10000)

# Compute samples of difference of means: bs_diff_replicates
bs_diff_replicates = bs_replicates_2012 - bs_replicates_1975

# Compute 95% confidence interval: conf_int
conf_int = np.percentile(bs_diff_replicates,[2.5, 97.5])

# Print the results
print('difference of means =', mean_diff, 'mm')
print('95% confidence interval =', conf_int, 'mm')
示例#2
0
import numpy as np
import pandas as pd
from customlib import bootstrap_repl as bt

df = pd.read_csv(
    'C:/Users/amlan/Documents/Git Repos/Machine Learning/Neural-Networks-DataCamp/customlib/datasets/bee_sperm.csv',
    skiprows=3)
control, treated = df[df['Treatment'] == 'Control']['AliveSperm'], df[
    df['Treatment'] == 'Pesticide']['AliveSperm']
# Compute the difference in mean sperm count: diff_means
diff_means = np.mean(control) - np.mean(treated)

# Compute mean of pooled data: mean_count
sperm_concat = np.concatenate((control, treated))
mean_count = np.mean(sperm_concat)

# Generate shifted data sets
control_shifted = control - np.mean(control) + mean_count
treated_shifted = treated - np.mean(treated) + mean_count

# Generate bootstrap replicates
bs_reps_control = bt.draw_bs_reps(control_shifted, np.mean, size=10000)
bs_reps_treated = bt.draw_bs_reps(treated_shifted, np.mean, size=10000)

# Get replicates of difference of means: bs_replicates
bs_replicates = bs_reps_control - bs_reps_treated

# Compute and print p-value: p
p = np.sum(bs_replicates >= np.mean(control) - np.mean(treated)) \
            / len(bs_replicates)
print('p-value =', p)
import numpy as np
from customlib import bootstrap_repl as bt
# Compute mean of all forces: mean_force
mean_force = np.mean(forces_concat)
# Generate shifted arrays
force_a_shifted = force_a - np.mean(force_a) + mean_force
force_b_shifted = force_b - np.mean(force_b) + mean_force

# Compute 10,000 bootstrap replicates from shifted arrays
bs_replicates_a = bt.draw_bs_reps(force_a_shifted, np.mean, 10000)
bs_replicates_b = bt.draw_bs_reps(force_b_shifted, np.mean, 10000)
# Get replicates of difference of means: bs_replicates
bs_replicates = bs_replicates_a - bs_replicates_b

empirical_diff_means = np.mean(force_a) - np.mean(force_b)
# Compute and print p-value: p
p = np.sum(bs_replicates >= empirical_diff_means) / len(bs_replicates)
#print('p', p)
print('p-value =', p)
rainfall = np.array([
    875.5, 648.2, 788.1, 940.3, 491.1, 743.5, 730.1, 686.5, 878.8, 865.6,
    654.9, 831.5, 798.1, 681.8, 743.8, 689.1, 752.1, 837.2, 710.6, 749.2,
    967.1, 701.2, 619., 747.6, 803.4, 645.6, 804.1, 787.4, 646.8, 997.1, 774.,
    734.5, 835., 840.7, 659.6, 828.3, 909.7, 856.9, 578.3, 904.2, 883.9, 740.1,
    773.9, 741.4, 866.8, 871.1, 712.5, 919.2, 927.9, 809.4, 633.8, 626.8,
    871.3, 774.3, 898.8, 789.6, 936.3, 765.4, 882.1, 681.1, 661.3, 847.9,
    683.9, 985.7, 771.1, 736.6, 713.2, 774.5, 937.7, 694.5, 598.2, 983.8,
    700.2, 901.3, 733.5, 964.4, 609.3, 1035.2, 718., 688.6, 736.8, 643.3,
    1038.5, 969., 802.7, 876.6, 944.7, 786.6, 770.4, 808.6, 761.3, 774.2,
    559.3, 674.2, 883.6, 823.9, 960.4, 877.8, 940.6, 831.8, 906.2, 866.5,
    674.1, 998.1, 789.3, 915., 737.1, 763., 666.7, 824.5, 913.8, 905.1, 667.8,
    747.4, 784.7, 925.4, 880.2, 1086.9, 764.4, 1050.1, 595.2, 855.2, 726.9,
    785.2, 948.8, 970.6, 896., 618.4, 572.4, 1146.4, 728.2, 864.2, 793.
])

# Generate 10,000 bootstrap replicates of the variance: bs_replicates
bs_replicates = bt.draw_bs_reps(rainfall, np.var, 10000)

# Put the variance in units of square centimeters
bs_replicates = bs_replicates / 100

# Make a histogram of the results
_ = plt.hist(bs_replicates, bins=50, density=True)
_ = plt.xlabel('variance of annual rainfall (sq. cm)')
_ = plt.ylabel('PDF')

# Show the plot
plt.show()
示例#5
0
    654.9, 831.5, 798.1, 681.8, 743.8, 689.1, 752.1, 837.2, 710.6, 749.2,
    967.1, 701.2, 619., 747.6, 803.4, 645.6, 804.1, 787.4, 646.8, 997.1, 774.,
    734.5, 835., 840.7, 659.6, 828.3, 909.7, 856.9, 578.3, 904.2, 883.9, 740.1,
    773.9, 741.4, 866.8, 871.1, 712.5, 919.2, 927.9, 809.4, 633.8, 626.8,
    871.3, 774.3, 898.8, 789.6, 936.3, 765.4, 882.1, 681.1, 661.3, 847.9,
    683.9, 985.7, 771.1, 736.6, 713.2, 774.5, 937.7, 694.5, 598.2, 983.8,
    700.2, 901.3, 733.5, 964.4, 609.3, 1035.2, 718., 688.6, 736.8, 643.3,
    1038.5, 969., 802.7, 876.6, 944.7, 786.6, 770.4, 808.6, 761.3, 774.2,
    559.3, 674.2, 883.6, 823.9, 960.4, 877.8, 940.6, 831.8, 906.2, 866.5,
    674.1, 998.1, 789.3, 915., 737.1, 763., 666.7, 824.5, 913.8, 905.1, 667.8,
    747.4, 784.7, 925.4, 880.2, 1086.9, 764.4, 1050.1, 595.2, 855.2, 726.9,
    785.2, 948.8, 970.6, 896., 618.4, 572.4, 1146.4, 728.2, 864.2, 793.
])

# Take 10,000 bootstrap replicates of the mean: bs_replicates
bs_replicates = bt.draw_bs_reps(rainfall, np.mean, 10000)

# Compute and print SEM
sem = np.std(rainfall) / np.sqrt(len(rainfall))
print(sem)

# Compute and print standard deviation of bootstrap replicates
bs_std = np.std(bs_replicates)
print(bs_std)

# Make a histogram of the results
_ = plt.hist(bs_replicates, bins=50, density=True)
_ = plt.xlabel('mean annual rainfall (mm)')
_ = plt.ylabel('PDF')

# Show the plot
For that we could use a permutation test. The hypothesis is that the means are equal.
To perform this hypothesis test, we need to shift the two data sets so that they have the same mean and
then use bootstrap sampling to compute the difference of means."""

import pandas as pd
import numpy as np
from customlib import bootstrap_repl as bt
from customlib import finch_beaks_df as finch

bd_1975, bd_2012, bl_1975, bl_2012 = finch.draw_finch_data()

# Compute mean of combined data set: combined_mean
combined_mean = np.mean(np.concatenate((bd_1975, bd_2012)))

# Shift the samples
bd_1975_shifted = bd_1975 - np.mean(bd_1975) + combined_mean
bd_2012_shifted = bd_2012 - np.mean(bd_2012) + combined_mean

# Get bootstrap replicates of shifted data sets
bs_replicates_1975 = bt.draw_bs_reps(bd_1975_shifted, np.mean, 10000)
bs_replicates_2012 = bt.draw_bs_reps(bd_2012_shifted, np.mean, 10000)

# Compute replicates of difference of means: bs_diff_replicates
bs_diff_replicates = bs_replicates_2012 - bs_replicates_1975

mean_diff = 0.22622047244094645
# Compute the p-value
p = np.sum(bs_diff_replicates >= mean_diff) / len(bs_diff_replicates)

# Print p-value
print('p =', p)
import numpy as np
from customlib import bootstrap_repl as bt

# Make an array of translated impact forces: translated_force_b
translated_force_b = force_b - np.mean(force_b) + 0.55

# Take bootstrap replicates of Frog B's translated impact forces: bs_replicates
bs_replicates = bt.draw_bs_reps(translated_force_b, np.mean, 10000)

# Compute fraction of replicates that are less than the observed Frog B force: p
p = np.sum(bs_replicates <= np.mean(force_b)) / 10000

# Print the p-value
print('p = ', p)