Пример #1
0
def EstimateGoals(lam, m):
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    lams = []
    for _ in range(m):
        goals = SimulateGame(lam)
        lams.append(goals)

    print('RMSE of Goals: ', estimation.RMSE(lams, lam))
    print('Mean Error of Goals: ', estimation.MeanError(lams, lam))

    cdf = thinkstats2.Cdf(lams)
    ci = cdf.Percentile(5), cdf.Percentile(95)
    VertLine(ci[0])
    VertLine(ci[1])

    thinkplot.Cdf(cdf)
    #thinkplot.Show(xlabel = 'Goals', ylabel = 'CumProb', title = 'Sampling Distribution, lam = ' + str(lam))
    thinkplot.SaveFormat(root='Q9_sampling_dist',
                         fmt='png',
                         xlabel='Goals',
                         ylabel='CumProb',
                         title='Sampling Distribution, lam = ' + str(lam))
Пример #2
0
import nsfg
import thinkstats2
import thinkplot
import probability

p = nsfg.ReadFemResp()
act_pmf = thinkstats2.Pmf(p.numkdhh, label='actual')
print(act_pmf)

bias_pmf = probability.BiasPmf(act_pmf, label='observed')
print(bias_pmf)

print('Mean number of children, actual: ', act_pmf.Mean())
print('Mean number of children, biased: ', bias_pmf.Mean())

fig = thinkplot.Pmfs([act_pmf, bias_pmf])
#thinkplot.show(xlabel='No. of Children', ylabel='pmf')
thinkplot.SaveFormat(root = 'act_vs_biased',
               fmt = 'png',
               xlabel = 'No. of Children',
               ylabel = 'pmf')
Пример #3
0
thinkstats2.RandomSeed(1)
ns = np.arange(10, 1000, 10)
stderrs = []
cis = []
cdfs = []
for n in ns:
    cdf, stderr, ci = Simulate_Sample(2, n, m=1000)
    cdfs.append(cdf)
    stderrs.append(stderr)
    cis.append(ci)

print('Standard error, n = 10: ', stderrs[0])
print('Confidence interval, n = 10: ', cis[0])

idx = [i for i, x in enumerate(ns) if x == 100]
print('Standard error, n = 100: ', stderrs[list(ns).index(100)])
print('Confidence interval, n = 100: ', cis[list(ns).index(100)])

print('Standard error, n = 1000: ', stderrs[-1])
print('Confidence interval, n = 1000: ', cis[-1])

thinkplot.Cdf(cdfs[0])
#thinkplot.Show(xlabel='x', ylabel='CumProb')
thinkplot.SaveFormat(root='Q8_cdf', fmt='png', xlabel='x', ylabel='CumProb')

thinkplot.Plot(ns, stderrs)
#thinkplot.Show(xlabel='n', ylabel='Standard Error')
thinkplot.SaveFormat(root='Q8_stderr',
                     fmt='png',
                     xlabel='n',
                     ylabel='Standard Error')
Пример #4
0
import DataSet
import pdb

H155 = DataSet.DataSet('h155.pkl')

df = H155.df
errorlist = []
totalvars = len(H155.varnames)
for index, var in enumerate(H155.varnames):

    print '\nPlotting %s, variable %d out of %d.' % (var, index + 1, totalvars)
    thisplot = df[df[var] > 0][var]
    try:
        thishist = ts2.Cdf(thisplot)
        tplt.Cdf(thishist)
        tplt.Config(title=var, ylabel='Probability', xlabel='Response')
    except Exception, e:
        print 'ERROR CREATING %s' % var
        errorlist.append((var, e))
    else:
        tplt.SaveFormat('graphs/cdftests2/%s' % var, 'png')
        tplt.Clf()

# Exclude error codes
# Title the graphs
# place counter in for loop
print errorlist
with open('graphs/cdftests2/errorlog.txt', 'wb+') as wrfile:
    for err in errorlist:
        wrfile.write("%s\n" % err)
Пример #5
0
import first
import numpy as np

live, firsts, others = first.MakeFrames()
live = live.dropna(subset=['agepreg', 'totalwgt_lb'])

rho = thinkstats2.Corr(live.agepreg, live.totalwgt_lb)
rho_s = thinkstats2.SpearmanCorr(live.agepreg, live.totalwgt_lb)
print('Pearson\'s Correlation, Mother\'s age and Birth weight: ', rho)
print('Spearman\'s Rank Correlation, Mother\'s age and Birth weight: ', rho_s)

thinkplot.LEGEND = False
thinkplot.Scatter(live.agepreg, live.totalwgt_lb)
#thinkplot.Show(xlabel = 'Mother\'s age', ylabel = 'Birth weight')
thinkplot.SaveFormat(root='age_weight_scatter',
                     fmt='png',
                     xlabel='Mothers\'s age',
                     ylabel='Birth weight')

thinkplot.LEGEND = True
bins = np.arange(10, 45, 2.5)
indices = np.digitize(live.agepreg, bins)
groups = live.groupby(indices)
ages = [group.agepreg.mean() for i, group in groups]
cdfs = [thinkstats2.Cdf(group.totalwgt_lb) for i, group in groups]
for percent in [75, 50, 25]:
    weights = [cdf.Percentile(percent) for cdf in cdfs]
    label = '%dth' % percent
    thinkplot.Plot(ages, weights, label=label)
#thinkplot.Show(xlabel = 'Mother\'s age', ylabel = 'Birth weight')
thinkplot.SaveFormat(root='age_weight_percentiles',
                     fmt='png',
Пример #6
0
import nsfg
import thinkstats2
import thinkplot

import random
import pandas as pd
import numpy as np


def rand_list(n):
    rands = [random.random() for _ in range(n)]
    return rands


rands = rand_list(1000)
pmf = thinkstats2.Pmf(rands)
cdf = thinkstats2.Cdf(rands)

thinkplot.Pmf(pmf)
#thinkplot.Show(xlabel='x', ylabel='pmf')
thinkplot.SaveFormat(root='Q4_2pmf', fmt='png', xlabel='x', ylabel='Prob')

thinkplot.Cdf(cdf)
#thinkplot.Show(xlabel = 'x', ylabel = 'cdf')
thinkplot.SaveFormat(root='Q4_2cdf', fmt='png', xlabel='x', ylabel='CumProb')