示例#1
0
def PlotMarathon(data):
    """Plots the marathon data.
   
    data: 
    """
    print len(data)
    xs, ys = zip(*data)
    print xs, ys

    n = 29
    fxs, fys = xs[:n], ys[:n]
    sxs, sys = xs[n:], ys[n:]

    inter, slope = correlation.LeastSquares(sxs, sys)
    print '(inter, slope):', inter, slope

    end = 2043
    target = 26.21875 / 2
    lxs, lys = MakeLine(target, 0, [1965, end])
    pyplot.plot(lxs, lys, 'r')

    lxs, lys = MakeLine(inter, slope, [1965, end])
    pyplot.plot(lxs, lys, 'b')

    #pyplot.plot(fxs, fys, 'go')
    pyplot.plot(sxs, sys, 'bo')

    xticks = range(1980, 2060, 20)
    pyplot.xticks(xticks)

    myplot.Save(root='world_record_predict',
                title='Marathon record projection',
                ylabel='mph',
                formats=['pdf', 'eps', 'png'])
def NormalPlot(ys, label, color='b', jitter=0.0, **line_options):
    """Makes a normal probability plot.
    
    Args:
        ys: sequence of values
        label: string label for the plotted line
        color: color string passed along to pyplot.plot
        jitter: float magnitude of jitter added to the ys 
        line_options: dictionary of options for pyplot.plot        
    """
    n = len(ys)
    xs = [random.gauss(0.0, 1.0) for i in range(n)]
    xs.sort()
    ys = [y + random.uniform(-jitter, +jitter) for y in ys]
    ys.sort()

    inter, slope = correlation.LeastSquares(xs, ys)
    fit = correlation.FitLine(xs, inter, slope)
    pyplot.plot(*fit, color=color, linewidth=0.5, alpha=0.5)

    pyplot.plot(sorted(xs),
                sorted(ys),
                color=color,
                marker='.',
                label=label,
                markersize=3,
                alpha=0.1,
                **line_options)
示例#3
0
def Pvalue(filename='heri.0', delta=0.033, n=100000):
    data = ReadData(filename)

    count = 0
    for i in range(n):
        xs, ys = zip(*data)
        ys = list(ys)
        random.shuffle(ys)
        inter, slope = correlation.LeastSquares(xs, ys)

        if abs(slope) > delta:
            count += 1
    return float(count) / n
示例#4
0
def FitCdf(cdf):
    """Fits a line to the log CCDF and returns the slope.

    cdf: Cdf of RDT
    """
    xs, ps = cdf.xs, cdf.ps
    cps = [1 - p for p in ps]

    xs = xs[1:-1]
    lcps = [math.log(p) for p in cps[1:-1]]

    _inter, slope = correlation.LeastSquares(xs, lcps)
    return -slope
示例#5
0
    def testLeastSquares(self):
        xs = [1, 2, 3]
        ys = [3, 6, 8]
        inter, slope = correlation.LeastSquares(xs, ys)
        self.assertAlmostEquals(inter, 0.66666666)
        self.assertAlmostEquals(slope, 2.5)

        res = correlation.Residuals(xs, ys, inter, slope)
        for got, exp in zip(res, [-0.166666666, 0.33333333, -0.16666666666]):
            self.assertAlmostEquals(got, exp)

        R2 = correlation.CoefDetermination(ys, res)
        self.assertAlmostEquals(R2, 0.986842105263)
示例#6
0
def Fit(xs, ys):
    """Find the linear least squares fit between xs and ys."""
    inter, slope = correlation.LeastSquares(xs, ys)
    print '(inter, slope):', inter, slope

    res = correlation.Residuals(xs, ys, inter, slope)
    R2 = correlation.CoefDetermination(ys, res)

    print 'inter', inter
    print 'slope', slope
    print 'R^2', R2
    print

    return inter, slope, R2
示例#7
0
def Fit(halfs, fulls):
    """Find the linear least squares fit between halfs and fulls."""
    inter, slope = correlation.LeastSquares(halfs, fulls)
    print '(inter, slope):', inter, slope

    res = correlation.Residuals(halfs, fulls, inter, slope)
    R2 = correlation.CoefDetermination(fulls, res)

    print 'inter', inter
    print 'slope', slope
    print 'R^2', R2
    print

    print 'prediction', inter + slope * ConvertTimeToMinutes('1:34:05')

    return inter, slope, R2
示例#8
0
def FitLine(xs, ys, fxs):
    """Fits a line to the xs and ys, and returns fitted values for fxs.

    Applies a log transform to the xs.

    xs: diameter in cm
    ys: age in years
    fxs: diameter in cm
    """
    lxs = [math.log(x) for x in xs]
    inter, slope = correlation.LeastSquares(lxs, ys)
    # res = correlation.Residuals(lxs, ys, inter, slope)
    # r2 = correlation.CoefDetermination(ys, res)

    lfxs = [math.log(x) for x in fxs]
    fys = [inter + slope * x for x in lfxs]
    return fys
示例#9
0
def ComputeLeastSquares(ages, weights):
    """Computes least squares fit for ages and weights.

    Prints summary statistics.
    """
    # compute the correlation between age and weight
    print 'Pearson correlation', correlation.Corr(ages, weights)
    print 'Spearman correlation', correlation.SpearmanCorr(ages, weights)

    # compute least squares fit
    inter, slope = correlation.LeastSquares(ages, weights)
    print '(inter, slope):', inter, slope

    res = correlation.Residuals(ages, weights, inter, slope)
    R2 = correlation.CoefDetermination(weights, res)

    print 'R^2', R2
    print
    return inter, slope, R2
示例#10
0
def ComputeCorrelations():
    resp = brfss_scatter.Respondents()
    resp.ReadRecords()
    print('Number of records:', len(resp.records))

    heights, weights = resp.GetHeightWeight()
    pearson = correlation.Corr(heights, weights)
    print('Pearson correlation (weights):', pearson)

    log_weights = Log(weights)
    pearson = correlation.Corr(heights, log_weights)
    print('Pearson correlation (log weights):', pearson)

    spearman = correlation.SpearmanCorr(heights, weights)
    print('Spearman correlation (weights):', spearman)

    inter, slope = correlation.LeastSquares(heights, log_weights)
    print('Least squares inter, slope (log weights):', inter, slope)

    res = correlation.Residuals(heights, log_weights, inter, slope)
    R2 = correlation.CoefDetermination(log_weights, res)
    print('Coefficient of determination:', R2)
    print('sqrt(R^2):', math.sqrt(R2))