Python Scatter示例，thinkplot.Scatter Python示例

示例#1

0

显示文件

def PlotQuadraticModel(daily, name):
    """
    """
    model, results = RunQuadraticModel(daily)
    regression.SummarizeResults(results)
    timeseries.PlotFittedValues(model, results, label=name)
    thinkplot.Save(root='timeseries11',
                   title='fitted values',
                   xlabel='years',
                   xlim=[-0.1, 3.8],
                   ylabel='price per gram ($)')

    timeseries.PlotResidualPercentiles(model, results)
    thinkplot.Save(root='timeseries12',
                   title='residuals',
                   xlabel='years',
                   ylabel='price per gram ($)')

    years = np.linspace(0, 5, 101)
    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
    timeseries.PlotPredictions(daily, years, func=RunQuadraticModel)
    thinkplot.Save(root='timeseries13',
                   title='predictions',
                   xlabel='years',
                   xlim=[years[0] - 0.1, years[-1] + 0.1],
                   ylabel='price per gram ($)')

示例#2

0

显示文件

文件： ch10.py 项目： smithb16/ThinkStats2

def ScatterFit(xs, ys, **options):
    inter, slope = LeastSquares(xs, ys)
    fit_xs, fit_ys = FitLine(xs, inter, slope)
    thinkplot.Scatter(xs, ys, color='blue', alpha=0.1, s=10)
    thinkplot.Plot(fit_xs, fit_ys, color='white', linewidth=3)
    thinkplot.Plot(fit_xs, fit_ys, color='red', linewidth=2)
    thinkplot.Show(legend=False, **options)

示例#3

0

显示文件

文件： Week7_chap07sol_7.1_rkarna.py 项目： rkarna/ThinkStats2

def ScatterPlot(ages, weights, alpha=1.0):
    thinkplot.Scatter(ages, weights, alpha=alpha)
    thinkplot.Config(xlabel='age (years)',
                     ylabel='weight (lbs)',
                     xlim=[10, 45],
                     ylim=[0, 15],
                     legend=False)

示例#4

0

显示文件

def PlotArrivalDepartureDelayFit(flights):
    """Plots a scatter plot and fitted curve.

    live: DataFrame
    """

    sample = thinkstats2.SampleRows(flights, 1000)
    arrivalDelays = sample.ARRIVAL_DELAY
    departureDelays = sample.DEPARTURE_DELAY
    inter, slope = thinkstats2.LeastSquares(arrivalDelays, departureDelays)
    fit_xs, fit_ys = thinkstats2.FitLine(arrivalDelays, inter, slope)

    thinkplot.Scatter(arrivalDelays, departureDelays, color='gray', alpha=0.1)
    thinkplot.Plot(fit_xs, fit_ys, color='white', linewidth=3)
    thinkplot.Plot(fit_xs, fit_ys, color='blue', linewidth=2)
    thinkplot.Save(
        root='ArrivalDepartureDelayFit_linear1',
        xlabel='arrival delay (min)',
        ylabel='departure delay (min)',
        #                   axis=[10, 45, 0, 15],
        legend=False)

    formula = 'DEPARTURE_DELAY ~ ARRIVAL_DELAY'
    model = smf.ols(formula, data=sample)
    results = model.fit()
    regression.SummarizeResults(results)

示例#5

0

显示文件

def MakeArrivalDepartureDelayScatterPlots(flights):
    """Make scatterplots.
    """
    sample = thinkstats2.SampleRows(flights, 10000)

    # simple scatter plot
    thinkplot.PrePlot(cols=2)
    #    departureDelays, arrivalDelays = GetArrivalDepartureDelay(sample)
    #    airports = sample.AIRLINE
    #   arrivalDelays = sample.ARRIVAL_DELAY
    #    ScatterPlot(airports, arrivalDelays)

    # scatter plot with jitter
    #    thinkplot.SubPlot(2)
    departureDelays, arrivalDelays = GetArrivalDepartureDelay(sample,
                                                              hjitter=1.3,
                                                              wjitter=0.5)

    thinkplot.Scatter(arrivalDelays, departureDelays, alpha=1)
    thinkplot.Config(
        xlabel='arrival delay (min)',
        ylabel='departure delay (min)',
        #                     axis=[-20, 20, 20, 200],
        legend=False)

    thinkplot.Save(root='ArrivalDepartureDelayScatterplot')

示例#6

0

显示文件

def PlotEwmaPredictions(daily, name):
    """
    """

    # use EWMA to estimate slopes
    filled = timeseries.FillMissing(daily)
    filled['slope'] = pandas.ewma(filled.ppg.diff(), span=180)
    filled[-1:]

    # extract the last inter and slope
    start = filled.index[-1]
    inter = filled.ewma[-1]
    slope = filled.slope[-1]

    # reindex the DataFrame, adding a year to the end
    dates = pandas.date_range(filled.index.min(),
                              filled.index.max() + np.timedelta64(365, 'D'))
    predicted = filled.reindex(dates)

    # generate predicted values and add them to the end
    predicted['date'] = predicted.index
    one_day = np.timedelta64(1, 'D')
    predicted['days'] = (predicted.date - start) / one_day
    predict = inter + slope * predicted.days
    predicted.ewma.fillna(predict, inplace=True)

    # plot the actual values and predictions
    thinkplot.Scatter(daily.ppg, alpha=0.1, label=name)
    thinkplot.Plot(predicted.ewma)
    thinkplot.Save()

示例#7

0

显示文件

文件： 10_HO1.py 项目： fullern1/previouscode

def PlotScatter(age, wgt, xmin, xmax, ymin, ymax):
    thinkplot.Scatter(age, wgt, alpha=1.0)
    thinkplot.Config(xlabel='Age (Years)',
                     ylabel='Birth Weight (lbs)',
                     xlim=[xmin, xmax],
                     ylim=[ymin, ymax],
                     legend=False)
    thinkplot.Show()

示例#8

0

显示文件

文件： timeseries.py 项目： husseingb/ThinkStats2

def PlotFilled(daily, name):
    """Plots the EWMA and filled data.

    daily: DataFrame of daily prices
    """
    filled = FillMissing(daily, span=30)
    thinkplot.Scatter(filled.ppg, s=15, alpha=0.3, label=name)
    thinkplot.Plot(filled.ewma, label='EWMA', alpha=0.4)
    pyplot.xticks(rotation=30)
    thinkplot.Save(root='timeseries8', ylabel='price per gram ($)')

示例#9

0

显示文件

文件： timeseries.py 项目： crskbel-ca/PythonBayesianStatistics

def PlotFittedValues(model, results, label=''):
    """Plots original data and fitted values.

    model: StatsModel model object
    results: StatsModel results object
    """
    years = model.exog[:, 1]
    values = model.endog
    thinkplot.Scatter(years, values, s=15, label=label)
    thinkplot.Plot(years, results.fittedvalues, label='model')

示例#10

0

显示文件

def PlotSimplePrediction(results, years):
    predict = GenerateSimplePrediction(results, years)

    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.2, label=name)
    thinkplot.Plot(years, predict, color='#ff7f00')
    xlim = years[0] - 0.1, years[-1] + 0.1
    thinkplot.Show(title='Predictions',
                   xlabel='Years',
                   xlim=xlim,
                   ylabel='Price per gram ($)',
                   loc='upper right')

示例#11

0

显示文件

文件： timeseries.py 项目： crskbel-ca/PythonBayesianStatistics

def main(name):
    thinkstats2.RandomSeed(18)
    transactions = ReadData()

    dailies = GroupByQualityAndDay(transactions)
    PlotDailies(dailies)
    RunModels(dailies)
    PrintSerialCorrelations(dailies)
    MakeAcfPlot(dailies)

    name = 'high'
    daily = dailies[name]

    PlotLinearModel(daily, name)
    PlotRollingMean(daily, name)
    PlotFilled(daily, name)

    years = np.linspace(0, 5, 101)
    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
    PlotPredictions(daily, years)
    xlim = years[0] - 0.1, years[-1] + 0.1
    thinkplot.Save(root='timeseries4',
                   title='predictions',
                   xlabel='years',
                   xlim=xlim,
                   ylabel='price per gram ($)',
                   formats=FORMATS)

    name = 'medium'
    daily = dailies[name]

    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
    PlotIntervals(daily, years)
    PlotPredictions(daily, years)
    xlim = years[0] - 0.1, years[-1] + 0.1
    thinkplot.Save(root='timeseries5',
                   title='predictions',
                   xlabel='years',
                   xlim=xlim,
                   ylabel='price per gram ($)',
                   formats=FORMATS)

示例#12

0

显示文件

文件： timeseries.py 项目： crskbel-ca/PythonBayesianStatistics

def PlotRollingMean(daily, name):
    """Plots rolling mean and EWMA.

    daily: DataFrame of daily prices
    """
    dates = pandas.date_range(daily.index.min(), daily.index.max())
    reindexed = daily.reindex(dates)

    thinkplot.PrePlot(cols=2)
    thinkplot.Scatter(reindexed.ppg, s=15, alpha=0.1, label=name)
    roll_mean = pandas.rolling_mean(reindexed.ppg, 30)
    thinkplot.Plot(roll_mean, label='rolling mean')
    pyplot.xticks(rotation=30)
    thinkplot.Config(ylabel='price per gram ($)')

    thinkplot.SubPlot(2)
    thinkplot.Scatter(reindexed.ppg, s=15, alpha=0.1, label=name)
    ewma = pandas.ewma(reindexed.ppg, span=30)
    thinkplot.Plot(ewma, label='EWMA')
    pyplot.xticks(rotation=30)
    thinkplot.Save(root='timeseries10', formats=FORMATS)

示例#13

0

显示文件

文件： scatter.py 项目： avinashalapati09/dsc530

def ScatterPlot(heights, weights, alpha=1.0):
    """Make a scatter plot and save it.

    heights: sequence of float
    weights: sequence of float
    alpha: float
    """
    thinkplot.Scatter(heights, weights, alpha=alpha)
    thinkplot.Config(xlabel='height (cm)',
                     ylabel='weight (kg)',
                     axis=[140, 210, 20, 200],
                     legend=False)

示例#14

0

显示文件

def PlotFilled(daily, name):
    """Plot the EWMA and filled data.

    daily: DataFrame of daily prices
    name: string
    """
    filled = FillMissing(daily, span=30)
    thinkplot.Scatter(filled.ppg, s=15, alpha=0.2, label=name)
    thinkplot.Plot(filled.ewma, label='EWMA', color='#ff7f00')
    plt.xticks(rotation=30)
    thinkplot.Config(label='Price per gram ($)')
    thinkplot.Show()

示例#15

0

显示文件

文件： chap07soln.py 项目： UnderPaidMathematician/ThinkStats2

def ScatterPlot(ages, weights, alpha=1.0):
    """Make a scatter plot and save it.

    ages: sequence of float
    weights: sequence of float
    alpha: float
    """
    thinkplot.Scatter(ages, weights, alpha=alpha)
    thinkplot.Config(xlabel='age (years)',
                     ylabel='weight (lbs)',
                     xlim=[10, 45],
                     ylim=[0, 15],
                     legend=False)

示例#16

0

显示文件

文件： brfss_scatter.py 项目： Ehsan1981/ThinkStats2

def ScatterPlot(root, heights, weights, alpha=1.0):
    """Make a scatter plot and save it.

    root: string filename root
    heights: sequence of float
    weights: sequence of float
    alpha: float
    """
    thinkplot.Scatter(heights, weights, alpha=alpha)
    thinkplot.Save(root=root,
                   xlabel='Height (cm)',
                   ylabel='Weight (kg)',
                   axis=[140, 210, 20, 200],
                   legend=False)

示例#17

0

显示文件

def PlotRollingMean(daily, name):
    """Plots rolling mean.

    daily: DataFrame of daily prices
    name: string
    """
    dates = pd.date_range(daily.index.min(), daily.index.max())
    reindexed = daily.reindex(dates)

    thinkplot.Scatter(reindexed.ppg, s=15, alpha=0.2, label=name)
    roll_mean = reindexed.ppg.rolling(30).mean()
    thinkplot.Plot(roll_mean, label='rolling mean', color='#ff7f00')
    plt.xticks(rotation=30)
    thinkplot.Config(ylabel='price per gram ($)')
    thinkplot.Show()

示例#18

0

显示文件

文件： recommendations.py 项目： qrsforever/workspace

def sim_pearson(perfs, p1, p2):
    """
    皮尔逊相关系数(Pearson correlation coefficient)
    cov(X, Y) / sigmaX*sigmaY
    协方差(X,Y) / X的标准方差*Y的标准方差
    """
    shared_items = {}
    for item in perfs[p1]:
        if item in perfs[p2]:
            shared_items[item] = 1

    n = len(shared_items)

    if n == 0: return 0 

    # p1, p2共同的影评数据
    data_p1 = [perfs[p1][it] for it in shared_items]
    data_p2 = [perfs[p2][it] for it in shared_items]

    # 计算影评均值
    mu_p1 = sum(data_p1) / n
    mu_p2 = sum(data_p2) / n
    #  print(mu_p1, mu_p2)

    # 计算标准方差
    var_p1 = sum([pow(it-mu_p1, 2) for it in data_p1]) / n
    var_p2 = sum([pow(it-mu_p2, 2) for it in data_p2]) / n
    #  print(var_p1, var_p2)

    if var_p1 == 0 or var_p2 == 0: return 0

    # 计算协方差
    cov = sum([(x-mu_p1)*(y-mu_p2) for x, y in zip(data_p1, data_p2)]) / n
    #  print(cov)

    # 计算皮尔逊相关系数
    r = cov / sqrt(var_p1*var_p2)

    # ============  thinkstat 方法 ===============

    if show:
        rr = correlation.Corr(data_p1, data_p2)
        print(r, rr)
        thinkplot.Clf()
        thinkplot.Scatter(data_p1, data_p2)
        thinkplot.Show()
    
    return r

示例#19

0

显示文件

文件： correlate1.py 项目： wu12345/ThinkStats2

def main(name, data_dir='.'):
    xs, ys = ReadData(data_dir)

    thinkplot.Scatter(xs, ys, alpha=0.05)
    thinkplot.Save(root='correlate1',
                   xlabel='Age (years)',
                   ylabel='Birth weight (oz)',
                   axis=[9, 45, 0, 250])

    print 'Pearson', thinkstats2.Corr(xs, ys)
    print 'Spearman', thinkstats2.SpearmanCorr(xs, ys)

    for i in range(10):
        print SimulateNull(list(xs), list(ys))

    print PValue(xs, ys, 1000)

示例#20

0

显示文件

文件： Assignment12.1EEdmunds.py 项目： tripleee19/EDA-of-Russian-Crime-Data

def scatter(x):
    tot_crimes = df.Total_crimes
    thinkplot.Scatter(df[x], tot_crimes, alpha=.5)
    if x == 'month':
        thinkplot.Show(title="Total Crimes vs Time",
                       xlabel="Year",
                       ylabel="Total Crimes")
    else:
        thinkplot.Show(title="Total Crimes vs " + x + " Crimes",
                       xlabel=x + " Crimes",
                       ylabel="Total Crimes")
        print(x + " crime stats")
        print("Spearman's correlation:",
              thinkstats2.SpearmanCorr(tot_crimes, df[x]))
        print("Covariance:", thinkstats2.Cov(tot_crimes, df[x]))
        print()

示例#21

0

显示文件

文件： timeseries.py 项目： crskbel-ca/PythonBayesianStatistics

def PlotDailies(dailies):
    """Makes a plot with daily prices for different qualities.

    dailies: map from name to DataFrame
    """
    thinkplot.PrePlot(rows=3)
    for i, (name, daily) in enumerate(dailies.items()):
        thinkplot.SubPlot(i + 1)
        title = 'price per gram ($)' if i == 0 else ''
        thinkplot.Config(ylim=[0, 20], title=title)
        thinkplot.Scatter(daily.ppg, s=10, label=name)
        if i == 2:
            pyplot.xticks(rotation=30)
        else:
            thinkplot.Config(xticks=[])

    thinkplot.Save(root='timeseries1', formats=FORMATS)

示例#22

0

显示文件

def PlotFit(live):
    """Plots a scatter plot and fitted curve.

    live: DataFrame
    """
    ages = live.agepreg
    weights = live.totalwgt_lb
    inter, slope = thinkstats2.LeastSquares(ages, weights)
    fit_xs, fit_ys = thinkstats2.FitLine(ages, inter, slope)

    thinkplot.Scatter(ages, weights, color='gray', alpha=0.1)
    thinkplot.Plot(fit_xs, fit_ys, color='white', linewidth=3)
    thinkplot.Plot(fit_xs, fit_ys, color='blue', linewidth=2)
    thinkplot.Save(root='linear1',
                   xlabel='age (years)',
                   ylabel='birth weight (lbs)',
                   axis=[10, 45, 0, 15],
                   legend=False)

示例#23

0

显示文件

文件： agemodel.py 项目： wu12345/ThinkStats2

def MakeFigures(pool, firsts, others):
    """Creates several figures for the book."""

    # CDF of all ages
    thinkplot.Clf()
    thinkplot.Cdf(pool.age_cdf)
    thinkplot.Save(root='agemodel_age_cdf',
                title="Distribution of mother's age",
                xlabel='age (years)',
                ylabel='CDF',
                legend=False)

    # CDF of all weights
    thinkplot.Clf()
    thinkplot.Cdf(pool.weight_cdf)
    thinkplot.Save(root='agemodel_weight_cdf',
                title="Distribution of birth weight",
                xlabel='birth weight (oz)',
                ylabel='CDF',
                legend=False)

    # plot CDFs of birth ages for first babies and others
    thinkplot.Clf()
    thinkplot.Cdfs([firsts.age_cdf, others.age_cdf])
    thinkplot.Save(root='agemodel_age_cdfs',
                title="Distribution of mother's age",
                xlabel='age (years)',
                ylabel='CDF')

    thinkplot.Clf()
    thinkplot.Cdfs([firsts.weight_cdf, others.weight_cdf])
    thinkplot.Save(root='agemodel_weight_cdfs',
                title="Distribution of birth weight",
                xlabel='birth weight (oz)',
                ylabel='CDF')

    # make a scatterplot of ages and weights
    ages, weights = GetAgeWeight(pool)
    thinkplot.clf()
    thinkplot.Scatter(ages, weights, alpha=0.2)
    thinkplot.Save(root='agemodel_scatter',
                xlabel='Age (years)',
                ylabel='Birth weight (oz)',
                legend=False)

示例#24

0

显示文件

def main():
    random.seed(17)

    rho = -0.8
    res = CorrelatedGenerator(1000, rho)
    xs, ys = zip(*res)

    a = 1.0
    b = 0.0
    xs = [a * x + b for x in xs]

    print 'mean, var of x', thinkstats2.MeanVar(xs)
    print 'mean, var of y', thinkstats2.MeanVar(ys)
    print 'covariance', thinkstats2.Cov(xs, ys)
    print 'Pearson corr', thinkstats2.Corr(xs, ys)
    print 'Spearman corr', thinkstats2.SpearmanCorr(xs, ys)

    thinkplot.Scatter(xs, ys)
    thinkplot.Show()

示例#25

0

显示文件

def main():
    random.seed(17)

    rho = 0.8
    xs, ys = SatIqData(1000, rho)
    print 'mean, var of x', thinkstats2.MeanVar(xs)
    print 'mean, var of y', thinkstats2.MeanVar(ys)
    print 'Pearson corr', thinkstats2.Corr(xs, ys)

    inter, slope = thinkstats2.LeastSquares(xs, ys)
    print 'inter', inter
    print 'slope', slope

    fxs, fys = thinkstats2.FitLine(xs, inter, slope)
    res = thinkstats2.Residuals(xs, ys, inter, slope)
    R2 = thinkstats2.CoefDetermination(ys, res)
    print 'R2', R2

    thinkplot.Plot(fxs, fys, color='gray', alpha=0.2)
    thinkplot.Scatter(xs, ys)
    thinkplot.Show()

示例#26

0

显示文件

文件： Week10_chap12sol_12.1n12.2_rkarna.py 项目： rkarna/ThinkStats2

def PlotQuadraticModel(daily, name):
    model, results = RunQuadraticModel(daily)
    regression.SummarizeResults(results)
    timeseries.PlotFittedValues(model, results, label=name)
    thinkplot.Save(root='Output_Timeseries1',
                   title='Fitted Val',
                   xlabel='yr',
                   xlim=[-0.2, 4],
                   ylabel='price per gram ($)')

    timeseries.PlotResidualPercentiles(model, results)
    thinkplot.Save(root='Output_Timeseries2',
                   title='Residual',
                   xlabel='yr',
                   ylabel='price per gram ($)')

    years = np.linspace(0, 10, 200)
    thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
    timeseries.PlotPredictions(daily, years, func=RunQuadraticModel)
    thinkplot.Save(root='Output_Timeseries3',
                   title='Predict',
                   xlabel='yr',
                   xlim=[years[0]-0.1, years[-1]+0.1],
                   ylabel='price per gram ($)')

示例#27

0

显示文件

#%%
# plot fitted values
timeseries.PlotFittedValues(model, results, label=name)
thinkplot.Config(title='Fitted Values',
                 xlabel='years',
                 xlim=[-0.1, 3.8],
                 ylabel='price ($)/gram')

#%%
# plot predictions

# set linear spacing of years
years = np.linspace(0, 5, 101)

thinkplot.Scatter(daily.years, daily.ppg, alpha=0.1, label=name)
timeseries.PlotPredictions(daily, years, func=RunQuadraticModel)

thinkplot.Config(title='predictions',
                 xlabel='years',
                 xlim=[years[0] - 0.1, years[-1] + 0.1],
                 ylabel='price ($)/gram')

#%% [markdown]
# ### Exercise 12.2
# Write a definition for a class named `SerialCorrelationTest` that extends `HypothesisTest` from Section 9.2. It should take a series and a lag as data, compute the serial correlation of the series with the given lag, and then compute the p-value of the observed correlation.
#
# Use this class to test whether the serial correlation in raw price data is statistically significant. Also test the residuals of the linear model and (if you did the previous exercise), the quadratic model.


#%%

示例#28

0

显示文件

cdf = thinkstats2.Cdf(df.Age)
thinkplot.Cdf(cdf)
thinkplot.Config(xlabel='Age', ylabel='CDF')

#plot normal distribution
mean = df.Age.mean()
std = df.Age.std()
xs = [-4, 4]
fxs, fys = thinkstats2.FitLine(xs, inter=mean, slope=std)
thinkplot.Plot(fxs, fys, color='gray', label='model')
xs, ys = thinkstats2.NormalProbability(df.Age)
thinkplot.Plot(xs, ys, label='Age')

#scatter plots and correlation
#year vs. age
year = thinkstats2.Jitter(df.Year, .25)
thinkplot.Scatter(year, df.Age)
thinkplot.Show(xlabel='Year', ylabel='Age')
thinkstats2.Corr(df.Year, df.Age)
#drug vs. age
thinkplot.Scatter(df.Age, df.Drug)
thinkplot.Show(xlabel='Age', ylabel='Drug')

#testing a difference in gender
data = male.Age.values, female.Age.values
ht = DiffMeansPermute(data)
pvalue = ht.PValue()
print(pvalue)
ht.PlotCdf()
thinkplot.Config(xlabel='test statistic', ylabel='CDF')

示例#29

0

显示文件

# imports
from __future__ import print_function, division
%matplotlib inline
import numpy as np
import thinkstats2
import thinkplot

# get data
import first
live, firsts, others = first.MakeFrames()
live = live.dropna(subset=['agepreg', 'totalwgt_lb'])
ages = live.agepreg
weights = live.totalwgt_lb

# make a scatter plot of birth weight versus mother’s age
thinkplot.Scatter(ages, weights, alpha=1, s=10)
thinkplot.Config(xlabel='Age (years)',
                 ylabel='Weight (lbs)',
                 xlim=[10, 45],
                 ylim=[0, 15],
                 legend=False)
# RESULTS: messy plot

# Plot percentiles of birth weight versus mother’s age
bins = np.arange(10, 45, 5)
indices = np.digitize(live.agepreg, bins)
groups = live.groupby(indices)
# binned mother's age

mean_ages = [group.agepreg.mean() for i, group in groups]
cdfs_wgt= [thinkstats2.Cdf(group.totalwgt_lb) for i, group in groups]

示例#30

0

显示文件

文件： Mayasa_Dablan_cs240_project.py 项目： MayasaD/cs-project-

        meanx = np.mean(xs)
    if meany is None:
        meany = np.mean(ys)

    cov = np.dot(xs-meanx, ys-meany) / len(xs)
    return cov


# In[65]:

def Jitter(values, jitter=0.5): # the Jitter is plotted using the Jitter module and scatter
    n = len(values)           
    return np.random.normal(0, jitter, n) + values
heights = Jitter(wins, 1.4)
weights = Jitter(runs, 0.5)
thinkplot.Scatter(heights, weights, alpha=0.9, s=40,color='blue')
thinkplot.Config(xlabel='Wins',
                 ylabel='Runs',
                 axis=[0, 210, 20, 200],
                 legend=False)


# In[66]:

# In the Fifth part 
# the covariance is needed in order to compute the correlation
Cov(wins_sample, runs_sample)


# In[67]: