Python SubPlot示例，thinkplot.SubPlot Python示例

示例#1

0

显示文件

def main():
    filename = 'mystery0.dat'
    data = read_file(filename)

    pmf = thinkstats2.MakePmfFromList(data)
    cdf = thinkstats2.MakeCdfFromList(data)

    pdf = thinkstats2.EstimatedPdf(data)
    low, high = min(data), max(data)
    xs = numpy.linspace(low, high, 101)
    kde_pmf = pdf.MakePmf(xs)

    bin_data = BinData(data, low, high, 51)
    bin_pmf = thinkstats2.MakePmfFromList(bin_data)

    thinkplot.SubPlot(2, 2, 1)
    thinkplot.Hist(pmf, width=0.1)
    thinkplot.Config(title='Naive Pmf')

    thinkplot.SubPlot(2, 2, 2)
    thinkplot.Hist(bin_pmf)
    thinkplot.Config(title='Binned Hist')

    thinkplot.SubPlot(2, 2, 3)
    thinkplot.Pmf(kde_pmf)
    thinkplot.Config(title='KDE PDF')

    thinkplot.SubPlot(2, 2, 4)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='CDF')

    thinkplot.Show()

示例#2

0

显示文件

def MakeFigures(df):
    """Generates CDFs and normal prob plots for weights and log weights."""
    weights = df.wtkg2.dropna()
    log_weights = np.log10(weights)

    # plot weights on linear and log scales
    thinkplot.PrePlot(cols=2)
    MakeNormalModel(weights)
    thinkplot.Config(xlabel='adult weight (kg)', ylabel='CDF')

    thinkplot.SubPlot(2)
    MakeNormalModel(log_weights)
    thinkplot.Config(xlabel='adult weight (log10 kg)')

    thinkplot.Save(root='brfss_weight')

    # make normal probability plots on linear and log scales
    thinkplot.PrePlot(cols=2)
    MakeNormalPlot(weights)
    thinkplot.Config(xlabel='z', ylabel='weights (kg)')

    thinkplot.SubPlot(2)
    MakeNormalPlot(log_weights)
    thinkplot.Config(xlabel='z', ylabel='weights (log10 kg)')

    thinkplot.Save(root='brfss_weight_normal')

示例#3

0

显示文件

文件： scatter.py 项目： avinashalapati09/dsc530

def MakeFigures(df):
    """Make scatterplots.
    """
    sample = thinkstats2.SampleRows(df, 5000)

    # simple scatter plot
    thinkplot.PrePlot(cols=2)
    heights, weights = GetHeightWeight(sample)
    ScatterPlot(heights, weights)

    # scatter plot with jitter
    thinkplot.SubPlot(2)
    heights, weights = GetHeightWeight(sample, hjitter=1.3, wjitter=0.5)
    ScatterPlot(heights, weights)

    thinkplot.Save(root='scatter1')

    # with jitter and transparency
    thinkplot.PrePlot(cols=2)
    ScatterPlot(heights, weights, alpha=0.1)

    # hexbin plot
    thinkplot.SubPlot(2)
    heights, weights = GetHeightWeight(df, hjitter=1.3, wjitter=0.5)
    HexBin(heights, weights)
    thinkplot.Save(root='scatter2')

示例#4

0

显示文件

文件： chap8ex.py 项目： seppomerimaa/ThinkStats2

def ex3():
    def VertLine(x, y=1):
        thinkplot.Plot([x, x], [0, y], color='0.8', linewidth=3)

    lam = 4
    goal_totals = [SimulateGame(lam=lam) for _ in range(1000)]
    print('RMSE', RMSE(goal_totals, lam))
    hist = thinkstats2.Hist(goal_totals)
    cdf = thinkstats2.Cdf(goal_totals)
    thinkplot.PrePlot(rows=2, cols=2)
    thinkplot.SubPlot(1)
    thinkplot.Hist(hist)
    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf)
    VertLine(cdf.Percentile(5))
    VertLine(cdf.Percentile(95))
    thinkplot.SubPlot(3)

    # lambda vs. rmse
    # rmse goes up as lambda goes up
    lams = range(1, 15)
    rmses = [RMSE([SimulateGame(lam=l) for _ in range(1000)], l) for l in lams]
    thinkplot.Plot(lams, rmses)
    thinkplot.SubPlot(4)

    # m vs. rmse
    # maybe rmse very slowly goes down as m goes up?
    # not at all clear that's really the case...
    ms = np.arange(10, 1000, 10)
    rmses = [RMSE([SimulateGame() for _ in range(m)], 4) for m in ms]
    thinkplot.Plot(ms, rmses)

    thinkplot.show()

示例#5

0

显示文件

文件： 9_HO1.py 项目： fullern1/previouscode

def MakePdfs(greq, less):
    greqpdf = thinkstats2.EstimatedPdf(greq.totalwgt_lb.dropna())
    lesspdf = thinkstats2.EstimatedPdf(less.totalwgt_lb.dropna())
    thinkplot.PrePlot(rows=1, cols=2)
    thinkplot.SubPlot(1)
    thinkplot.Pdf(greqpdf, label='greater/equal to 30')
    thinkplot.Config(xlabel='Birth weight (lbs)', ylabel='PDF')
    thinkplot.SubPlot(2)
    thinkplot.Pdf(lesspdf, label='less than 30')
    thinkplot.Config(xlabel='Birth weight (lbs)', ylabel='PDF')
    thinkplot.Show()

示例#6

0

显示文件

def PlotRemainingLifetime(sf1, sf2):
    """Plots remaining lifetimes for pregnancy and age at first marriage.

    sf1: SurvivalFunction for pregnancy length
    sf2: SurvivalFunction for age at first marriage
    """
    thinkplot.PrePlot(cols=2)
    rem_life1 = sf1.RemainingLifetime()
    thinkplot.Plot(rem_life1)
    thinkplot.Config(title='remaining pregnancy length',
                     xlabel='weeks',
                     ylabel='mean remaining weeks')

    thinkplot.SubPlot(2)
    func = lambda pmf: pmf.Percentile(50)
    rem_life2 = sf2.RemainingLifetime(filler=np.inf, func=func)
    thinkplot.Plot(rem_life2)
    thinkplot.Config(title='years until first marriage',
                     ylim=[0, 15],
                     xlim=[11, 31],
                     xlabel='age (years)',
                     ylabel='median remaining years')

    thinkplot.Save(root='survival6',
                   formats=FORMATS)

示例#7

0

显示文件

文件： hinc.py 项目： seppomerimaa/ThinkStats2

def main():
    df = ReadData()
    cdf = thinkstats2.Cdf(df['ps'])

    thinkplot.PrePlot(rows=1, cols=2)
    thinkplot.SubPlot(1)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.show(legend=False)
    
    print(df)

示例#8

0

显示文件

文件： analytic.py 项目： wu12345/ThinkStats2

def MakeBabyBoom():
    """Plot CDF of interarrival time on log and linear scales.
    """
    # compute the interarrival times
    df = ReadBabyBoom()
    diffs = df.minutes.diff()
    cdf = thinkstats2.Cdf(diffs, label='actual')

    thinkplot.PrePlot(cols=2)
    thinkplot.Cdf(cdf)
    thinkplot.Config(xlabel='minutes', ylabel='CDF', legend=False)

    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Config(xlabel='minutes',
                     ylabel='CCDF',
                     yscale='log',
                     legend=False)

    thinkplot.Save(root='analytic_interarrivals')

    n = len(diffs)
    lam = 44 / 24 * 60.0
    sample = [random.expovariate(lam) for _ in range(n)]
    model = thinkstats2.Cdf(sample, label='model')

    thinkplot.PrePlot(2)
    thinkplot.Cdfs([cdf, model], complement=True)
    thinkplot.Save(root='analytic_interarrivals_model',
                   title='Time between births',
                   xlabel='minutes',
                   ylabel='CCDF',
                   yscale='log')

示例#9

0

显示文件

def MakeHists(live):
    """Plot Hists for live births

    live: DataFrame
    others: DataFrame
    """
    hist = thinkstats2.Hist(np.floor(live.agepreg), label='agepreg')
    thinkplot.PrePlot(2, cols=2)

    thinkplot.SubPlot(1)
    thinkplot.Hist(hist)
    thinkplot.Config(xlabel='years', ylabel='frequency', axis=[0, 45, 0, 700])

    thinkplot.SubPlot(2)
    thinkplot.Pmf(hist)

    thinkplot.Save(root='probability_agepreg_hist',
                   xlabel='years',
                   axis=[0, 45, 0, 700])

示例#10

0

显示文件

文件： test_models.py 项目： RachelONelson/DSC530

def main(script, filename='mystery0.dat'):
    data = ReadFile(filename)
    cdf = thinkstats2.Cdf(data)

    thinkplot.PrePlot(num=6, rows=2, cols=3)
    thinkplot.SubPlot(1)
    thinkplot.Cdf(cdf, color='C0', label=filename)
    thinkplot.Config(title='CDF on linear scale', ylabel='CDF')

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, xscale='log', color='C0')
    thinkplot.Config(title='CDF on log-x scale', ylabel='CDF', **scale)

    thinkplot.SubPlot(3)
    scale = thinkplot.Cdf(cdf, transform='exponential', color='C0')
    thinkplot.Config(title='CCDF on log-y scale', ylabel='log CCDF', **scale)

    thinkplot.SubPlot(4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys, color='C0')
    thinkplot.Config(title='Normal probability plot',
                     xlabel='random normal',
                     ylabel='data')

    thinkplot.SubPlot(5)
    scale = thinkplot.Cdf(cdf, transform='pareto', color='C0')
    thinkplot.Config(title='CCDF on log-log scale', ylabel='log CCDF', **scale)

    thinkplot.SubPlot(6)
    scale = thinkplot.Cdf(cdf, transform='weibull', color='C0')
    thinkplot.Config(title='CCDF on loglog-y log-x scale',
                     ylabel='log log CCDF',
                     **scale)

    thinkplot.Show(legend=False)

示例#11

0

显示文件

def main():
    filename = 'mystery0.dat'
    data = read_file(filename)
    cdf = thinkstats2.MakeCdfFromList(data)

    thinkplot.SubPlot(2, 3, 1)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='linear')

    thinkplot.SubPlot(2, 3, 2)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(2, 3, 3)
    scale = thinkplot.Cdf(cdf, transform='exponential')
    thinkplot.Config(title='expo', **scale)

    thinkplot.SubPlot(2, 3, 4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys)
    thinkplot.Config(title='normal')

    thinkplot.SubPlot(2, 3, 5)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.SubPlot(2, 3, 6)
    scale = thinkplot.Cdf(cdf, transform='weibull')
    thinkplot.Config(title='weibull', **scale)

    thinkplot.Show()

示例#12

0

显示文件

def main(script, filename='mystery0.dat'):
    data = ReadFile(filename)
    cdf = thinkstats2.Cdf(data)

    thinkplot.PrePlot(rows=2, cols=3)
    thinkplot.SubPlot(1)
    thinkplot.Cdf(cdf)
    thinkplot.Config(title='linear')

    thinkplot.SubPlot(2)
    scale = thinkplot.Cdf(cdf, xscale='log')
    thinkplot.Config(title='logx', **scale)

    thinkplot.SubPlot(3)
    scale = thinkplot.Cdf(cdf, transform='exponential')
    thinkplot.Config(title='expo', **scale)

    thinkplot.SubPlot(4)
    xs, ys = thinkstats2.NormalProbability(data)
    thinkplot.Plot(xs, ys)
    thinkplot.Config(title='normal')

    thinkplot.SubPlot(5)
    scale = thinkplot.Cdf(cdf, transform='pareto')
    thinkplot.Config(title='pareto', **scale)

    thinkplot.SubPlot(6)
    scale = thinkplot.Cdf(cdf, transform='weibull')
    thinkplot.Config(title='weibull', **scale)

    thinkplot.Show(legend=False)

示例#13

0

显示文件

文件： project.py 项目： fullern1/previouscode

def MakeHists(male, female):
    """Plot Hists for live births

    live: DataFrame
    others: DataFrame
    """
    thinkplot.PrePlot(rows=1, cols=2)
    hist = thinkstats2.Hist(male.alcwknd)
    thinkplot.SubPlot(1)
    thinkplot.Config(axis=[0, 800, 0, 600],
                     ylabel='Number of people',
                     xlabel='Alcohol consumed (grams)',
                     title='Weekend Alcohol Consumption for Men')
    thinkplot.Hist(hist, alpha=1)

    hist = thinkstats2.Hist(female.alcwknd)
    thinkplot.SubPlot(2)
    thinkplot.Config(axis=[0, 800, 0, 1200],
                     ylabel='Number of people',
                     xlabel='Alcohol consumed (grams)',
                     title='Weekend Alcohol Consumption for Women')
    thinkplot.Hist(hist, alpha=1)
    thinkplot.Show()

示例#14

0

显示文件

文件： populations.py 项目： UnderPaidMathematician/ThinkStats2

def MakeFigures():
    """Plots the CDF of populations in several forms.

    On a log-log scale the tail of the CCDF looks like a straight line,
    which suggests a Pareto distribution, but that turns out to be misleading.

    On a log-x scale the distribution has the characteristic sigmoid of
    a lognormal distribution.

    The normal probability plot of log(sizes) confirms that the data fit the
    lognormal model very well.

    Many phenomena that have been described with Pareto models can be described
    as well, or better, with lognormal models.
    """
    pops = ReadData()
    print('Number of cities/towns', len(pops))
    
    log_pops = np.log10(pops)
    cdf = thinkstats2.Cdf(pops, label='data')
    cdf_log = thinkstats2.Cdf(log_pops, label='data')

    # pareto plot
    xs, ys = thinkstats2.RenderParetoCdf(xmin=5000, alpha=1.4, low=0, high=1e7)
    thinkplot.Plot(np.log10(xs), 1-ys, label='model', color='0.8')

    thinkplot.Cdf(cdf_log, complement=True) 
    thinkplot.Config(xlabel='log10 population',
                     ylabel='CCDF',
                     yscale='log')
    thinkplot.Save(root='populations_pareto')

    # lognormal plot
    thinkplot.PrePlot(cols=2)

    mu, sigma = log_pops.mean(), log_pops.std()
    xs, ps = thinkstats2.RenderNormalCdf(mu, sigma, low=0, high=8)
    thinkplot.Plot(xs, ps, label='model', color='0.8')

    thinkplot.Cdf(cdf_log) 
    thinkplot.Config(xlabel='log10 population',
                     ylabel='CDF')

    thinkplot.SubPlot(2)
    thinkstats2.NormalProbabilityPlot(log_pops, label='data')
    thinkplot.Config(xlabel='z',
                     ylabel='log10 population',
                     xlim=[-5, 5])

    thinkplot.Save(root='populations_normal')

示例#15

0

显示文件

def pmf_stuff(width, x_low, x_high, third, pmf_one, pmf_two, label,
              y_axis_scale):
    width = width
    axis = [x_low, x_high, third, y_axis_scale]
    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(pmf_one, align='right', width=width)
    thinkplot.Hist(pmf_two, align='left', width=width)
    thinkplot.Config(xlabel=label, ylabel='PMF', axis=axis)

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([pmf_one, pmf_two])
    thinkplot.Config(xlabel=label, ylabel='PMF', axis=axis)
    thinkplot.Show()

示例#16

0

显示文件

文件： Chap3PMF.py 项目： pansh94/Maths_Stats

def plot_bar_step(first_pmf, other_pmf):
    """PrePlot takes optional parameters rows and cols to make a grid of figures for bar grapg"""
    width = 0.5
    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(first_pmf, align="left", width=width)
    thinkplot.Hist(other_pmf, align="right", width=width)
    thinkplot.Config(xlabel="weeks",
                     ylabel="probability",
                     axis=[27, 46, 0, 0.6])
    #for step graph
    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([first_pmf, other_pmf])
    thinkplot.Show(xlabel="weeks", axis=[27, 46, 0, 0.6])

示例#17

0

显示文件

def NormalPlotSamples(samples, plot=1, ylabel=''):
    """Makes normal probability plots for samples.

    samples: list of samples
    label: string
    """
    for n, sample in samples:
        thinkplot.SubPlot(plot)
        thinkstats2.NormalProbabilityPlot(sample)

        thinkplot.Config(title='n=%d' % n,
                         legend=False,
                         xticks=[],
                         yticks=[],
                         ylabel=ylabel)
        plot += 1

示例#18

0

显示文件

文件： timeseries.py 项目： crskbel-ca/PythonBayesianStatistics

def PlotDailies(dailies):
    """Makes a plot with daily prices for different qualities.

    dailies: map from name to DataFrame
    """
    thinkplot.PrePlot(rows=3)
    for i, (name, daily) in enumerate(dailies.items()):
        thinkplot.SubPlot(i + 1)
        title = 'price per gram ($)' if i == 0 else ''
        thinkplot.Config(ylim=[0, 20], title=title)
        thinkplot.Scatter(daily.ppg, s=10, label=name)
        if i == 2:
            pyplot.xticks(rotation=30)
        else:
            thinkplot.Config(xticks=[])

    thinkplot.Save(root='timeseries1', formats=FORMATS)

示例#19

0

显示文件

文件： timeseries.py 项目： husseingb/ThinkStats2

def MakeAcfPlot(dailies):
    """Makes a figure showing autocorrelation functions.

    dailies: map from category name to DataFrame of daily prices    
    """
    axis = [0, 41, -0.2, 0.2]

    thinkplot.PrePlot(cols=2)
    PlotAutoCorrelation(dailies, add_weekly=False)
    thinkplot.Config(axis=axis,
                     loc='lower right',
                     ylabel='correlation',
                     xlabel='lag (day)')

    thinkplot.SubPlot(2)
    PlotAutoCorrelation(dailies, add_weekly=True)
    thinkplot.Save(root='timeseries9',
                   axis=axis,
                   loc='lower right',
                   xlabel='lag (days)')

示例#20

0

显示文件

文件： analytic.py 项目： AxleMaxGit/python-data-science-projects

def MakeBabyBoom():
    """Plot CDF of interarrival time on log and linear scales.
    """
    # compute the interarrival times
    df = ReadBabyBoom()
    diffs = df.minutes.diff()
    cdf = thinkstats2.Cdf(diffs, label='actual')

    thinkplot.PrePlot(cols=2)
    thinkplot.Cdf(cdf)
    thinkplot.Config(xlabel='minutes', ylabel='CDF', legend=False)

    thinkplot.SubPlot(2)
    thinkplot.Cdf(cdf, complement=True)
    thinkplot.Config(xlabel='minutes',
                     ylabel='CCDF',
                     yscale='log',
                     legend=False)

    thinkplot.Save(root='analytic_interarrivals', legend=False)

示例#21

0

显示文件

def PlotMarriageData(resp):
    """Plots hazard and survival functions.

    resp: DataFrame of respondents
    """
    hf, sf = EstimateMarriageSurvival(resp)

    thinkplot.PrePlot(rows=2)
    thinkplot.Plot(hf)
    thinkplot.Config(ylabel='hazard', legend=False)

    thinkplot.SubPlot(2)
    thinkplot.Plot(sf)
    thinkplot.Save(root='survival2',
                   xlabel='age (years)',
                   ylabel='prob unmarried',
                   ylim=[0, 1],
                   legend=False,
                   formats=FORMATS)
    return sf

示例#22

0

显示文件

def PlotSurvival(complete):
    """Plots survival and hazard curves.

    complete: list of complete lifetimes
    """
    thinkplot.PrePlot(3, rows=2)

    cdf = thinkstats2.Cdf(complete, label='cdf')
    sf = MakeSurvivalFromCdf(cdf, label='survival')
    print(cdf[13])
    print(sf[13])

    thinkplot.Plot(sf)
    thinkplot.Cdf(cdf, alpha=0.2)
    thinkplot.Config()

    thinkplot.SubPlot(2)
    hf = sf.MakeHazardFunction(label='hazard')
    print(hf[39])
    thinkplot.Plot(hf)
    thinkplot.Config(ylim=[0, 0.75])

示例#23

0

显示文件

文件： timeseries.py 项目： crskbel-ca/PythonBayesianStatistics

def PlotRollingMean(daily, name):
    """Plots rolling mean and EWMA.

    daily: DataFrame of daily prices
    """
    dates = pandas.date_range(daily.index.min(), daily.index.max())
    reindexed = daily.reindex(dates)

    thinkplot.PrePlot(cols=2)
    thinkplot.Scatter(reindexed.ppg, s=15, alpha=0.1, label=name)
    roll_mean = pandas.rolling_mean(reindexed.ppg, 30)
    thinkplot.Plot(roll_mean, label='rolling mean')
    pyplot.xticks(rotation=30)
    thinkplot.Config(ylabel='price per gram ($)')

    thinkplot.SubPlot(2)
    thinkplot.Scatter(reindexed.ppg, s=15, alpha=0.1, label=name)
    ewma = pandas.ewma(reindexed.ppg, span=30)
    thinkplot.Plot(ewma, label='EWMA')
    pyplot.xticks(rotation=30)
    thinkplot.Save(root='timeseries10', formats=FORMATS)

示例#24

0

显示文件

def MakeFigures(firsts, others):
    """Plot Pmfs of pregnancy length.

    firsts: DataFrame
    others: DataFrame
    """
    # plot the PMFs
    first_pmf = thinkstats2.Pmf(firsts.prglngth, label='first')
    other_pmf = thinkstats2.Pmf(others.prglngth, label='other')
    width = 0.45

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(first_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Config(xlabel='weeks',
                     ylabel='probability',
                     axis=[27, 46, 0, 0.6])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([first_pmf, other_pmf])
    thinkplot.Save(root='probability_nsfg_pmf',
                   xlabel='weeks',
                   axis=[27, 46, 0, 0.6])

    # plot the differences in the PMFs
    weeks = range(35, 46)
    diffs = []
    for week in weeks:
        p1 = first_pmf.Prob(week)
        p2 = other_pmf.Prob(week)
        diff = 100 * (p1 - p2)
        diffs.append(diff)

    thinkplot.Bar(weeks, diffs)
    thinkplot.Save(root='probability_nsfg_diffs',
                   title='Difference in PMFs',
                   xlabel='weeks',
                   ylabel='percentage points',
                   legend=False)

示例#25

0

显示文件

文件： survival.py 项目： husseingb/ThinkStats2

def PlotMarriageData():
    resp = chap01ex_soln.ReadFemResp()
    resp.cmmarrhx.replace([9997, 9998, 9999], np.nan, inplace=True)

    resp['agemarry'] = (resp.cmmarrhx - resp.cmbirth) / 12.0
    cdf = thinkstats2.Cdf(resp.agemarry)
    resp['age'] = (resp.cmintvw - resp.cmbirth) / 12.0
    cdf = thinkstats2.Cdf(resp.age)

    complete = resp[resp.evrmarry == 1].agemarry
    ongoing = resp[resp.evrmarry == 0].age

    hf = EstimateHazardFunction(complete, ongoing, label='hazard')
    sf = hf.MakeSurvival(label='survival')

    thinkplot.PrePlot(rows=2)
    thinkplot.Plot(hf)
    thinkplot.Config()

    thinkplot.SubPlot(2)
    thinkplot.Plot(sf)
    thinkplot.Save(root='survival2', xlabel='age (years)', ylim=[0, 1])

示例#26

0

显示文件

def main():
    p1 = thinkbayes2.MakeNormalPmf(0, 1, 3, n=101)
    p1.label = 'p1'
    p2 = p1.Copy(label='p2')

    q1 = thinkbayes2.MakeNormalPmf(0, 1, 3, n=101)
    q1.label = 'q1'
    q2 = q1.Copy(label='q2')

    p1, q1 = Update(p1, q1, True)
    p1, q2 = Update(p1, q2, True)
    p2, q1 = Update(p2, q1, True)
    p2, q2 = Update(p2, q2, False)

    thinkplot.PrePlot(num=4, rows=2)
    thinkplot.Pmfs([p1, p2])
    thinkplot.Config(legend=True)

    thinkplot.SubPlot(2)
    thinkplot.Pmfs([q1, q2])
    thinkplot.Show()

    print('Prob p1 > p2', p1 > p2)
    print('Prob q1 > q2', q1 > q2)

示例#27

0

显示文件

def compareDetroitAirport(flights):
    """Create PMF to compare Atlanta airport versus other airports
       Per JD Power: Detroit Metropolitan Wayne County Airport ranks highest in passenger satisfaction among mega airports with a score of 786. 
       https://www.jdpower.com/business/press-releases/2019-north-america-airport-satisfaction-study 

    """
    detroit = flights[flights.DESTINATION_AIRPORT == 'DTW']
    others = flights[flights.AIRLINE != 'DTW']
    detroit_pmf = thinkstats2.Pmf(detroit.ARRIVAL_DELAY,
                                  label='Detroit Metro Arrival Delay')
    other_pmf = thinkstats2.Pmf(others.ARRIVAL_DELAY, label='other')
    width = 0.45

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(detroit_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='-100to100DetroitDelayBarPMF',
                   title='-100 to 100 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability -100 to 100 mins',
                   axis=[-100, 100, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([detroit_pmf, other_pmf])
    thinkplot.Save(root='-100to100DetroitDelayStepPMF',
                   title='-100 to 100 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability -100 to 100 mins',
                   axis=[-100, 100, 0, 0.032])

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(detroit_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='-30to30DetroitDelayBarPMF',
                   title='-30 to 30 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability -30 to 30 mins',
                   axis=[-30, 30, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([detroit_pmf, other_pmf])
    thinkplot.Save(root='-30to30DetroitDelayStepPMF',
                   title='-30 to 30 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability -30 to 30 mins',
                   axis=[-30, 30, 0, 0.032])

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(detroit_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='-60to0DetroitDelayBarPMF',
                   title='-60 to 0 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability -60 to 0 mins',
                   axis=[-60, 0, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([detroit_pmf, other_pmf])
    thinkplot.Save(root='-60to0DetroitDelayStepPMF',
                   title='-60 to 0 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability -60 to 0 mins',
                   axis=[-60, 0, 0, 0.032])

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(detroit_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='0to60DetroitDelayBarPMF',
                   title='0 to 60 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability 0 to 60 mins',
                   axis=[0, 60, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([detroit_pmf, other_pmf])
    thinkplot.Save(root='0to60DetroitDelayStepPMF',
                   title='0 to 60 min Arrival Delay',
                   xlabel='detroit metro arrival delay',
                   ylabel='probability 0 to 60 mins',
                   axis=[0, 60, 0, 0.032])

示例#28

0

显示文件

def compareDay4(flights):
    """Create PMF to compare Day 4 (Thursday) with other days.
       I chose Day 4 (Thursday) because it showed the most flights for that day in the scatterplot

    """
    labelString = "Day 4 Arrival Delay"
    xLabelString = "day 4 arrival delay"
    day = flights[flights.DAY == 4]
    others = flights[flights.DAY != 4]
    day_pmf = thinkstats2.Pmf(day.ARRIVAL_DELAY, label=labelString)
    other_pmf = thinkstats2.Pmf(others.ARRIVAL_DELAY, label='other')
    width = 0.45

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(day_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='Thursday-100to100ArrivalDelayBarPMF',
                   title='-100 to 100 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability -100 to 100 mins',
                   axis=[-100, 100, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([day_pmf, other_pmf])
    thinkplot.Save(root='Thursday-100to100ArrivalDelayStepPMF',
                   title='-100 to 100 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability -100 to 100 mins',
                   axis=[-100, 100, 0, 0.032])

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(day_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='Thursday-30to30ArrivalDelayBarPMF',
                   title='-30 to 30 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability -30 to 30 mins',
                   axis=[-30, 30, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([day_pmf, other_pmf])
    thinkplot.Save(root='Thursday-30to30ArrivalDelayStepPMF',
                   title='-30 to 30 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability -30 to 30 mins',
                   axis=[-30, 30, 0, 0.032])

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(day_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='Thursday-60to0ArrivalDelayBarPMF',
                   title='-60 to 0 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability -60 to 0 mins',
                   axis=[-60, 0, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([day_pmf, other_pmf])
    thinkplot.Save(root='Thursday-60to0ArrivalDelayStepPMF',
                   title='-60 to 0 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability -60 to 0 mins',
                   axis=[-60, 0, 0, 0.032])

    thinkplot.PrePlot(2, cols=2)
    thinkplot.Hist(day_pmf, align='right', width=width)
    thinkplot.Hist(other_pmf, align='left', width=width)
    thinkplot.Save(root='Thursday0to60ArrivalDelayBarPMF',
                   title='0 to 60 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability 0 to 60 mins',
                   axis=[0, 60, 0, 0.032])

    thinkplot.PrePlot(2)
    thinkplot.SubPlot(2)
    thinkplot.Pmfs([day_pmf, other_pmf])
    thinkplot.Save(root='Thursday0to60ArrivalDelayStepPMF',
                   title='0 to 60 min Arrival Delay',
                   xlabel=xLabelString,
                   ylabel='probability 0 to 60 mins',
                   axis=[0, 60, 0, 0.032])

示例#29

0

显示文件

    def RunModel(self):
        np.random.shuffle(self.fake_data)
        return self.fake_data


## main scripts
if __name__ == '__main__':

    ## read csv and group by quality and day
    transactions = pd.read_csv('mj-clean.csv', parse_dates=[5])
    dailies = GroupByQualityAndDay(transactions)

    ## plot time series by quality
    thinkplot.PrePlot(rows=3)
    for i, (name, daily) in enumerate(dailies.items()):
        thinkplot.SubPlot(i + 1)
        title = 'Price per gram ($)' if i == 0 else ''
        thinkplot.Config(ylim=[0, 20], title=title)
        thinkplot.Scatter(daily.ppg, s=10, label=name)
        if i == 2:
            plt.xticks(rotation=30)
            thinkplot.Config()
        else:
            thinkplot.Config(xticks=[])

    plt.show()

    ## calculate linear regressions for each quality

    for name, daily in dailies.items():
        model, results = RunLinearModel(daily)

示例#30

0

显示文件

文件： ch7.py 项目： smithb16/ThinkStats2

    ## make HexBin plot
    thinkplot.HexBin(heights, weights)
    thinkplot.Show(xlabel='Height (cm)',
                   ylabel='Weight (kg)',
                   axis=[140, 210, 20, 200],
                   legend=False)

    ## now use entire dataset
    heights_all, weights_all = sample.htm3, sample.wtkg2
    heights_all = Jitter(heights, 1.4)
    weights_all = Jitter(weights, 0.5)

    ## make scatter plot
    thinkplot.PrePlot(num=2, cols=2)
    thinkplot.SubPlot(1)
    thinkplot.Scatter(heights_all, weights_all, alpha=0.1, s=10)
    thinkplot.Show(xlabel='Height (cm)',
                   ylabel='Weight (kg)',
                   axis=[140, 210, 20, 200],
                   legend=False)

    thinkplot.SubPlot(2)
    thinkplot.HexBin(heights_all, weights_all)
    thinkplot.Show(xlabel='Height (cm)',
                   ylabel='Weight (kg)',
                   axis=[140, 210, 20, 200],
                   legend=False)

    ## bin data
    cleaned = df.dropna(subset=['htm3', 'wtkg2'])