def PlotConfidenceIntervals(xs, inters, slopes, res=None, percent=90, **options): """Plots the 90% confidence intervals for weights based on ages. xs: sequence inters: estimated intercepts slopes: estimated slopes res: residuals percent: what percentile range to show """ fys_seq = [] for inter, slope in zip(inters, slopes): fxs, fys = thinkstats2.FitLine(xs, inter, slope) if res is not None: fys += np.random.permutation(res) fys_seq.append(fys) p = (100 - percent) / 2 percents = p, 100 - p low, high = thinkstats2.PercentileRows(fys_seq, percents) thinkplot.FillBetween(fxs, low, high, **options)
def ResampleSurvival(resp, iters=101): """Resamples respondents and estimates the survival function. resp: DataFrame of respondents iters: number of resamples """ _, sf = EstimateMarriageSurvival(resp) thinkplot.Plot(sf) low, high = resp.min(), resp.max() ts = np.arange(low, high, 1/12.0) ss_seq = [] for _ in range(iters): sample = thinkstats2.ResampleRowsWeighted(resp) _, sf = EstimateMarriageSurvival(sample) ss_seq.append(sf.Probs(ts)) low, high = thinkstats2.PercentileRows(ss_seq, [5, 95]) thinkplot.FillBetween(ts, low, high, color='gray', label='90% CI') thinkplot.Save(root='survival3', xlabel='age (years)', ylabel='prob unmarried', xlim=[12, 46], ylim=[0, 1], formats=FORMATS)
def PlotSurvivalFunctions(sf_map, predict_flag=False): """Plot estimated survival functions. sf_map: map from group name to sequence of survival functions predict_flag: whether the lines are predicted or actual """ thinkplot.PrePlot(len(sf_map)) for name, sf_seq in sorted(sf_map.items(), reverse=True): if len(sf_seq) == 0: continue sf = sf_seq[0] if len(sf) == 0: continue ts, rows = MakeSurvivalCI(sf_seq, [10, 50, 90]) thinkplot.FillBetween(ts, rows[0], rows[2], color='gray') if not predict_flag: thinkplot.Plot(ts, rows[1], label='19%d'%name) thinkplot.Config(xlabel='age (years)', ylabel='prob unmarried', xlim=[14, 45], ylim=[0, 1], legend=True, loc='upper right')
def SimulateAutocorrelation(daily, iters=1001, nlags=40): """Resample residuals, compute autocorrelation, and plot percentiles. daily: iters: nlags: """ # run simulations t = [] for i in range(iters): filled = FillMissing(daily, span=30) resid = thinkstats2.Resample(filled.resid) acf = smtsa.acf(resid, nlags=nlags, unbiased=True)[1:] t.append(np.abs(acf)) # put the results in an array and sort the columns size = iters, len(acf) array = np.zeros(size) for i, acf in enumerate(t): array[i, ] = acf array = np.sort(array, axis=0) # find the bounds that cover 95% of the distribution high = PercentileRow(array, 97.5) low = -high lags = range(1, nlags + 1) thinkplot.FillBetween(lags, low, high, alpha=0.2, color='gray')
def PlotSurvivalFunctions(sf_map, predict_flag=False, colormap=None): """Plot estimated survival functions. sf_map: map from group name to sequence of survival functions predict_flag: whether the lines are predicted or actual colormap: map from group name to color """ thinkplot.PrePlot(num=len(sf_map)) for name, sf_seq in sorted(sf_map.items(), reverse=True): if len(sf_seq) == 0: continue sf = sf_seq[0] if len(sf) == 0: continue ts, rows = MakeSurvivalCI(sf_seq, [10, 50, 90]) thinkplot.FillBetween(ts, rows[0], rows[2], color='gray', alpha=0.2) if not predict_flag: if colormap: color = colormap[name] thinkplot.Plot(ts, rows[1], label='%ds' % name, color=color) else: thinkplot.Plot(ts, rows[1], label='%ds' % name)
def PlotConfidenceIntervals(xs, inters, slopes, percent=90, **options): fys_seq = [] for inter, slope in zip(inters, slopes): fxs, fys = thinkstats2.FitLine(xs, inter, slope) fys_seq.append(fys) p = (100 - percent) / 2 percents = p, 100 - p low, high = thinkstats2.PercentileRows(fys_seq, percents) thinkplot.FillBetween(fxs, low, high, **options)
def PlotPredictions(daily, years, iters=101, percent=90, func=RunLinearModel): """Plots predictions. daily: DataFrame of daily prices years: sequence of times (in years) to make predictions for iters: number of simulations percent: what percentile range to show func: function that fits a model to the data """ result_seq = SimulateResults(daily, iters=iters, func=func) p = (100 - percent) / 2 percents = p, 100 - p predict_seq = GeneratePredictions(result_seq, years, add_resid=True) low, high = thinkstats2.PercentileRows(predict_seq, percents) thinkplot.FillBetween(years, low, high, alpha=0.3, color='gray') predict_seq = GeneratePredictions(result_seq, years, add_resid=False) low, high = thinkstats2.PercentileRows(predict_seq, percents) thinkplot.FillBetween(years, low, high, alpha=0.5, color='gray')
def PlotIntervals(daily, years, iters=101, percent=90, func=RunLinearModel): """Plots predictions based on different intervals. daily: DataFrame of daily prices years: sequence of times (in years) to make predictions for iters: number of simulations percent: what percentile range to show func: function that fits a model to the data """ result_seq = SimulateIntervals(daily, iters=iters, func=func) p = (100 - percent) / 2 predictions = GeneratePredictions(result_seq, years, add_resid=True) low = PercentileRow(predictions, p) high = PercentileRow(predictions, 100 - p) thinkplot.FillBetween(years, low, high, alpha=0.1, color='gray')
def SimulateAutocorrelation(daily, iters=1001, nlags=40): """Resample residuals, compute autocorrelation, and plot percentiles daily: DataFrame iters: number of simulations to run nlags: maximum lags to compute autocorrelation """ t = [] for _ in range(iters): filled = FillMissing(daily, span=30) resid = thinkstats2.Resample(filled.resid) acf = smtsa.acf(resid, nlags=nlags, unbiased=True)[1:] t.append(np.abs(acf)) high = thinkstats2.PercentileRows(t, [97.5])[0] low = -high lags = range(1, nlags + 1) thinkplot.FillBetween(lags, low, high, alpha=0.2, color='gray')
def ResampleSurvival(dados, limiar, iters=101): """Resamples respondents and estimates the survival function. resp: DataFrame of respondents iters: number of resamples """ _, sf = EstimateMarriageSurvival(dados, limiar) thinkplot.Plot(sf) low, high = dados.min(), dados.max() ts = np.arange(low, high, 1) ss_seq = [] for _ in range(iters): sample = thinkstats2.ResampleRowsWeighted(pd.DataFrame(dados), column='MANSO') _, sf = EstimateMarriageSurvival(sample['MANSO'], limiar) ss_seq.append(sf.Probs(ts)) low, high = thinkstats2.PercentileRows(ss_seq, [5, 95]) thinkplot.FillBetween(ts, low, high, color='gray', label='90% CI')
def PlotConfidenceIntervals(xs, inters, slopes, res=None, percent=90, **options): """Plots the 90% confidence intervals for weights based on ages. xs: sequence inters: estimated intercepts slopes: estimated slopes res: residuals percent: what percentile range to show """ size = len(slopes), len(xs) array = np.zeros(size) for i, (inter, slope) in enumerate(zip(inters, slopes)): fxs, fys = thinkstats2.FitLine(xs, inter, slope) if res is not None: fys += np.random.permutation(res) array[i, ] = fys array = np.sort(array, axis=0) def Percentile(p): """Selects the line from array that corresponds to percentile p. p: float 0--100 returns: NumPy array (one row) """ index = int(len(slopes) * p / 100) return array[index, ] p = (100 - percent) / 2 #low = thinkstats2.Smooth(Percentile(p)) #high = thinkstats2.Smooth(Percentile(100-p)) low = Percentile(p) high = Percentile(100 - p) thinkplot.FillBetween(fxs, low, high, **options)