Python TrimmedMeanVar示例，thinkstats2.TrimmedMeanVar Python示例

示例#1

0

显示文件

def MakeNormalModel(arrivalDelays):
    """Plot the CDF of arrival delays with a normal model.
       This is a modified copy from analytic.py
    """

    # estimate parameters: trimming outliers yields a better fit
    mu, var = thinkstats2.TrimmedMeanVar(arrivalDelays, p=0.01)
    print('Mean, Var', mu, var)

    # plot the model
    sigma = math.sqrt(var)
    print('Sigma', sigma)
    xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=12.5)

    thinkplot.Plot(xs, ps, label='model', color='0.8')

    # plot the data
    cdf = thinkstats2.Cdf(arrivalDelays, label='data')

    thinkplot.PrePlot(1)
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='NormalModel_arrivaldelay_model',
                   title='Arrival Delays',
                   xlabel='arrival delays (min)',
                   ylabel='CDF')

示例#2

0

显示文件

def PlotNormalProbability(sample, title="", ylabel=""):
    mu, var = thinkstats2.TrimmedMeanVar(sample, p=0.01)
    sigma = np.sqrt(var)
    xs = [-5, 5]
    fxs, fys = thinkstats2.FitLine(xs, inter=mu, slope=sigma)
    thinkplot.plot(fxs,
                   fys,
                   color='gray',
                   label=r'model $\mu$={:.2f} $\sigma$={:.2f}'.format(
                       mu, sigma))
    xs, ys = thinkstats2.NormalProbability(sample)
    thinkplot.Plot(xs, ys, label="actual")
    thinkplot.Config(title=title, xlabel="z", ylabel=ylabel)

示例#3

0

显示文件

def PlotNormalModel(sample, title="", xlabel=""):
    cdf = thinkstats2.Cdf(sample, label="actual")
    mu, var = thinkstats2.TrimmedMeanVar(sample, p=0.01)
    sigma = np.sqrt(var)
    xmin = mu - 4.0 * sigma
    xmax = mu + 4.0 * sigma
    xs, ys = thinkstats2.RenderNormalCdf(mu, sigma, xmin, xmax)
    thinkplot.Cdf(cdf)
    thinkplot.plot(xs,
                   ys,
                   label=r'model $\mu$={:.2f} $\sigma$={:.2f}'.format(
                       mu, sigma))
    thinkplot.Config(title=title, xlabel=xlabel, ylabel="CDF")

示例#4

0

显示文件

def MakeNormalModel(data, label):
    cdf = thinkstats2.Cdf(data, label=label)

    mean, var = thinkstats2.TrimmedMeanVar(data)
    std = np.sqrt(var)
    print('n, mean, std', len(data), mean, std)

    xmin = mean - 4 * std
    xmax = mean + 4 * std

    xs, ps = thinkstats2.RenderNormalCdf(mean, std, xmin, xmax)
    thinkplot.Plot(xs, ps, label='model', linewidth=4, color='0.8')
    thinkplot.Cdf(cdf)

示例#5

0

显示文件

def MakeNormalPlot(weights):
    """Generates a normal probability plot of birth weights.

    weights: sequence
    """
    mean, var = thinkstats2.TrimmedMeanVar(weights, p=0.01)
    std = math.sqrt(var)

    xs = [-5, 5]
    xs, ys = thinkstats2.FitLine(xs, mean, std)
    thinkplot.Plot(xs, ys, color='0.8', label='model')

    xs, ys = thinkstats2.NormalProbability(weights)
    thinkplot.Plot(xs, ys, label='weights')

示例#6

0

显示文件

文件： Assignment12.1EEdmunds.py 项目： tripleee19/EDA-of-Russian-Crime-Data

def MakeNormalPlot(x):
    """Generates a normal probability plot of birth weights."""

    mean, var = thinkstats2.TrimmedMeanVar(df[x], p=0.01)
    std = math.sqrt(var)

    xs = [-4, 4]
    fxs, fys = thinkstats2.FitLine(xs, mean, std)
    thinkplot.Plot(fxs, fys, linewidth=4, color='0.8')

    thinkplot.PrePlot(2)
    xs, ys = thinkstats2.NormalProbability(df[x])
    thinkplot.Plot(xs, ys, label='Number of Crimes')
    thinkplot.Show(title='Normal Prob Plot: {}'.format(x),
                   xlabel='Standard deviations from mean',
                   ylabel='Number of Crimes')

示例#7

0

显示文件

def MakeNormalModel(weights):
    """Plots a CDF with a Normal model.

    weights: sequence
    """
    cdf = thinkstats2.Cdf(weights, label='weights')

    mean, var = thinkstats2.TrimmedMeanVar(weights)
    std = math.sqrt(var)
    print('n, mean, std', len(weights), mean, std)

    xmin = mean - 4 * std
    xmax = mean + 4 * std

    xs, ps = thinkstats2.RenderNormalCdf(mean, std, xmin, xmax)
    thinkplot.Plot(xs, ps, label='model', linewidth=4, color='0.8')
    thinkplot.Cdf(cdf)

示例#8

0

显示文件

def MakeNormalModel(age):
    """Plots a CDF with a Normal model.

    age: sequence
    """
    cdf = thinkstats2.Cdf(age, label='variable')

    mean, var = thinkstats2.TrimmedMeanVar(age)
    std = np.sqrt(var)
    print('n, mean, std', len(age), mean, std)

    xmin = mean - 4 * std
    xmax = mean + 4 * std

    xs, ps = thinkstats2.RenderNormalCdf(mean, std, xmin, xmax)
    thinkplot.Plot(xs, ps, label='model', linewidth=4, color='0.8')
    thinkplot.Cdf(cdf)

示例#9

0

显示文件

文件： analytic.py 项目： AxleMaxGit/python-data-science-projects

def MakeNormalPlot(weights, term_weights):
    """Generates a normal probability plot of birth weights."""

    mean, var = thinkstats2.TrimmedMeanVar(weights, p=0.01)
    std = math.sqrt(var)

    xs = [-4, 4]
    fxs, fys = thinkstats2.FitLine(xs, mean, std)
    thinkplot.Plot(fxs, fys, linewidth=4, color='0.8')

    thinkplot.PrePlot(2)
    xs, ys = thinkstats2.NormalProbability(weights)
    thinkplot.Plot(xs, ys, label='all live')

    xs, ys = thinkstats2.NormalProbability(term_weights)
    thinkplot.Plot(xs, ys, label='full term')
    thinkplot.Save(root='analytic_birthwgt_normal',
                   title='Normal probability plot',
                   xlabel='Standard deviations from mean',
                   ylabel='Birth weight (lbs)')

示例#10

0

显示文件

文件： analytic.py 项目： AxleMaxGit/python-data-science-projects

def MakeNormalModel(weights):
    """Plot the CDF of birthweights with a normal model."""

    # estimate parameters: trimming outliers yields a better fit
    mu, var = thinkstats2.TrimmedMeanVar(weights, p=0.01)
    print('Mean, Var', mu, var)

    # plot the model
    sigma = math.sqrt(var)
    print('Sigma', sigma)
    xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=12.5)

    thinkplot.Plot(xs, ps, label='model', color='0.8')

    # plot the data
    cdf = thinkstats2.Cdf(weights, label='data')

    thinkplot.PrePlot(1)
    thinkplot.Cdf(cdf)
    thinkplot.Save(root='analytic_birthwgt_model',
                   title='Birth weights',
                   xlabel='birth weight (lbs)',
                   ylabel='CDF')

示例#11

0

显示文件

                 ylabel='CDF',
                 loc='upper left')

#%% [markdown]
# I'll use a normal model to fit the distribution of birth weights from the NSFG.

#%%
preg = nsfg.ReadFemPreg()
weights = preg.totalwgt_lb.dropna()

#%% [markdown]
# Here's the observed CDF and the model.  The model fits the data well except in the left tail.

#%%
# estimate parameters: trimming outliers yields a better fit
mu, var = thinkstats2.TrimmedMeanVar(weights, p=0.01)
print('Mean, Var', mu, var)

# plot the model
sigma = np.sqrt(var)
print('Sigma', sigma)
xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=12.5)

thinkplot.Plot(xs, ps, label='model', color='0.6')

# plot the data
cdf = thinkstats2.Cdf(weights, label='data')

thinkplot.PrePlot(1)
thinkplot.Cdf(cdf)
thinkplot.Config(title='Birth weights',

示例#12

0

显示文件

cdf = thinkstats2.Cdf(totalwgt_lb)
mu = totalwgt_lb.mean()
sigma = totalwgt_lb.std()
x = cdf.Values()
y = thinkstats2.EvalNormalCdf(x, mu=mu, sigma=sigma)
thinkplot.plot(x, cdf.Probs(x), label='Data')
thinkplot.plot(x,
               y,
               label=r'Model $\mu$={:.2f} $\sigma$={:.2f}'.format(mu, sigma))
thinkplot.Config(xlabel="weight (pounds)", ylabel="CDF")

#%%
# with trimming
totalwgt_lb = live.totalwgt_lb.dropna()
cdf = thinkstats2.Cdf(totalwgt_lb)
mu, var = thinkstats2.TrimmedMeanVar(totalwgt_lb, p=0.01)
sigma = np.sqrt(var)
x = cdf.Values()
y = thinkstats2.EvalNormalCdf(x, mu=mu, sigma=sigma)
thinkplot.plot(x, cdf.Probs(x), label='Data')
thinkplot.plot(x,
               y,
               label=r'Model $\mu$={:.2f} $\sigma$={:.2f}'.format(mu, sigma))
thinkplot.Config(xlabel="weight (pounds)", ylabel="CDF")

#%% [markdown]
# ## 5.3 Normal probability plt

#%%
n = 1000
thinkplot.PrePlot(3)

示例#13

0

显示文件

    xs, ps = thinkstats2.RenderNormalCdf(mu=mu, sigma=sigma, 
                                               low=-1.0, high=4.0)
    label = r'$\mu=%g$, $\sigma=%g$' % (mu, sigma)
    thinkplot.Plot(xs, ps, label=label)

thinkplot.Config(title='Normal CDF', xlabel='x', ylabel='CDF',
                 loc='upper left')


# In[71]:



#OBSERVED CDF AND MODEL
# estimate parameters: trimming outliers yields a better fit
mu, var = thinkstats2.TrimmedMeanVar(df.tripduration, p=0.01)
print('Mean, Var', mu, var)
    
# plot the model
sigma = np.sqrt(var)
print('Sigma', sigma)
xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=12.5)
thinkplot.Plot(xs, ps, label='model', color='0.6')

# plot the data
cdf = thinkstats2.Cdf(df.tripduration, label='data')

thinkplot.PrePlot(1)
thinkplot.Cdf(cdf) 
thinkplot.Config(title='Trip Duration',
                 xlabel='Trip Duration (Minutes)',

示例#14

0

显示文件

# One way to read this is that about 95% of films gross less than half a billion dollars.
# 
# The steepest portion of the graph is close closest to zero. Therefore, what this graph tells us is that most films don’t gross a lot of money at the box office, but a very small percentage gross an extraordinary amount of money.
# 
# This doesn’t directly address the question of if it is possible to predict, with a high level of certainty, if we can predict a film’s success, but it does let us know that what is most common amongst all the films is not likely what can best explain film success.

# I have chosen to use the normal distribution analytical model to explore the budget variable. Based on what we learned in the last graph, that the vast majority of films do gross close to nothing, this means that we also know that the distribution of revenue is extremely skewed. Therefore, any reliable predictor of an extremely skewed distribution must itself be an extremely skewed.
# 
# Since I previously hypothesized that for audiences to feel a movie is worth seeing in the theater, that it must be a large-scale film with explosions and special effects; both of which significantly drive up a film's budget. Then, if this is to hold true, I expect the normal distribution to not be a good model for the budget variable.
# 

# In[442]:


mu, var = thinkstats2.TrimmedMeanVar(filmsdata.budget, p=0.01)
print('Mean, Var', mu, var)


# In[443]:


max(filmsdata.budget)


# In[444]:


min(filmsdata.budget)