示例#1
0
def probe(filename, column, what):

    deprivation = getDeprivation(filename, column, what)

    print what
    print '  Highest'
    for k in sorted(rate.keys(), key=lambda k: deprivation[k],
                    reverse=True)[:10]:
        print '    {:32s}: {:.2f}'.format(codes[k], deprivation[k])
    print '  Lowest'
    for k in sorted(rate.keys(), key=lambda k: deprivation[k],
                    reverse=False)[:10]:
        print '    {:32s}: {:.2f}'.format(codes[k], deprivation[k])

    y = np.array([100.0 * rate[k] for k in rate.keys()])
    x = np.array([deprivation[k] for k in rate.keys()])

    c = [
        UKCovid19Data.colorsByRegion[UKCovid19Data.whichRegion(k)]
        for k in rate.keys()
    ]

    fig = plt.figure(figsize=(8, 6))
    plt.scatter(x, y, color=c, alpha=0.8)
    r = scipy.stats.linregress(x, y)
    gradient, intercept, r_value, p_value, std_err = r

    rx = np.linspace(min(x), max(x), 100)
    ry = gradient * rx + intercept
    plt.plot(rx, ry, color='tab:red', label='Linear regression')

    coef = np.polyfit(x, y, 2)
    qy = coef[2] + coef[1] * rx + coef[0] * rx**2
    plt.plot(rx, qy, color='tab:green', label='Quadratic best fit')

    ax = plt.gca()
    vals = ax.get_yticks()
    ax.set_yticklabels(['{:,.1f}%'.format(x) for x in vals])

    plt.ylabel('Daily % increase rate\n({} to {})'.format(dates[0], dates[-1]))
    plt.xlabel(what)

    plt.title('Deprivation: {}\nr={:.3f}'.format(what, r_value))

    regionsUsed = sorted(
        list(set([UKCovid19Data.whichRegion(k) for k in rate.keys()])))
    handles, labels = ax.get_legend_handles_labels()
    handles.extend([
        matplotlib.patches.Patch(color=UKCovid19Data.colorsByRegion[k],
                                 label=k) for k in regionsUsed
    ])
    plt.legend(handles=handles, loc='upper left', prop={'size': 6})

    distutils.dir_util.mkpath('output')
    plt.savefig('output/deprivation-{}.png'.format(
        filename.replace('(%)', 'percentage')),
                dpi=96)

    correlation[what] = r_value
示例#2
0
def plot(x,y,c,w,s,interesting):

    fig=plt.figure(figsize=(8,6))

    plt.scatter(x,y,s=s,color=c,alpha=0.8)
    
    # Unweighted regression line
    r=scipy.stats.linregress(x,y)
    gradient,intercept,r_value,p_value,std_err=r
    print 'Unweighted',gradient,intercept
    
    rx=np.linspace(min(x),max(x),100)
    ry=gradient*rx+intercept
    plt.plot(rx,ry,color='tab:orange',label='Linear regression (unweighted)')
    
    # Weighted regression line
    coef=np.polyfit(x,y,1,w=w)
    print 'Weighted',coef[0],coef[1]
    ry=coef[1]+coef[0]*rx  # Highest power first
    plt.plot(rx,ry,color='tab:red',label='Linear regression (weighted by total votes)')
    rw=corr(x,y,w)
    
    ax=plt.gca()
    vals=ax.get_yticks()
    ax.set_yticklabels(['{:,.1f}%'.format(x) for x in vals])
    vals=ax.get_xticks()
    ax.set_xticklabels(['{:,.1f}%'.format(x) for x in vals])

    regionsUsed=sorted(list(set([UKCovid19Data.whichRegion(k) for k in interesting])))
    handles,labels = ax.get_legend_handles_labels()
    handles.extend([matplotlib.patches.Patch(color=UKCovid19Data.colorsByRegion[k],label=k) for k in regionsUsed])
    plt.legend(handles=handles,loc='upper left',prop={'size':6})

    return r.rvalue,rw
示例#3
0
    regionsUsed=sorted(list(set([UKCovid19Data.whichRegion(k) for k in interesting])))
    handles,labels = ax.get_legend_handles_labels()
    handles.extend([matplotlib.patches.Patch(color=UKCovid19Data.colorsByRegion[k],label=k) for k in regionsUsed])
    plt.legend(handles=handles,loc='upper left',prop={'size':6})

    return r.rvalue,rw

plots=[('England',7,'England'),(None,7,'England, Scotland and Wales')]   #,('Scotland',7,'Scotland'),('Wales',5,'Wales')
for p in range(len(plots)):

    what=plots[p]
    print what[2]

    window=what[1]

    timeseries,dates,codes=UKCovid19Data.getUKCovid19Data(what[0],window+1,None)   # Need 8 days to get 7 growth rates.

    print len(timeseries),'timeseries'
    for c in timeseries.keys():
        print '  ',c,codes[c],timeseries[c]

    interesting=frozenset(timeseries.keys())
    codeRewrites=UKCovid19Data.getUKCodeRewrites(interesting)
    votesTotal,votesLeave=getVotesLeave(codeRewrites,interesting)

    # Couple of fixups to census data
    codeRewrites['E06000048']='E06000057' # Northumberland
    codeRewrites['E08000020']='E08000037' # Gateshead
    
    populationTotal,populationAged=getDemographics(codeRewrites,interesting)
    oldies={k:populationAged[k]/populationTotal[k] for k in populationTotal.keys()}
示例#4
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import distutils.dir_util
import math
import numpy as np

import UKCovid19Data

for what in [('England', 7, None), ('Scotland', 7, None), ('Wales', 7, None),
             (None, 7, None)]:

    timeseries, days, codes = UKCovid19Data.getUKCovid19Data(
        what[0], what[1] + 1, what[2])

    print '------'
    print what[0], days[0], days[-1], len(days)

    assert len(days) == what[1] + 1

    print 'Top 20 case counts'
    for k in sorted(timeseries, key=lambda k: timeseries[k][-1],
                    reverse=True)[:20]:
        print '  {:32s}: {:d}'.format(codes[k], int(timeseries[k][-1]))

    print

    window = what[1]
    growth = {
        k: (timeseries[k][-1] / timeseries[k][-1 - window])**(1.0 / window)
        for k in timeseries if timeseries[k][-1 - window] > 0.0
示例#5
0
def probe(filename,column,what,lowerTierPopulation):
    
    income,population=getIncome(filename,column,what,lowerTierPopulation)

    print what

    for k in rate.keys():
        if not k in income:
            print 'No income for',k,codes[k]
    
    print '  Highest'
    for k in sorted(rate.keys(),key=lambda k: income[k],reverse=True)[:10]:
        print '    {:32s}: {:.2f}'.format(codes[k],income[k])
    print '  Lowest'
    for k in sorted(rate.keys(),key=lambda k: income[k],reverse=False)[:10]:
        print '    {:32s}: {:.2f}'.format(codes[k],income[k])

    interesting=sorted(rate.keys(),key=lambda k: population[k],reverse=True)
    
    y=np.array([100.0*rate[k] for k in interesting])
    x=np.array([income[k] for k in interesting])

    c=[UKCovid19Data.colorsByRegion[UKCovid19Data.whichRegion(k)] for k in interesting]

    w=np.array([population[k] for k in interesting])
    s=np.sqrt(w/50.0)
    
    fig=plt.figure(figsize=(8,6))
    plt.scatter(x,y,color=c,alpha=0.8,s=s)
    r=scipy.stats.linregress(x,y)
    gradient,intercept,r_value,p_value,std_err=r
    
    rx=np.linspace(min(x),max(x),100)
    ry=gradient*rx+intercept
    plt.plot(rx,ry,color='tab:orange',label='Linear regression (unweighted)')

    coef=np.polyfit(x,y,1,w=w)
    ry=coef[1]+coef[0]*rx
    plt.plot(rx,ry,color='tab:red',label='Linear regression (weighted)')
    rw=corr(x,y,w)

    coef=np.polyfit(x,y,2,w=w)
    qy=coef[2]+coef[1]*rx+coef[0]*rx**2
    plt.plot(rx,qy,color='tab:green',label='Quadratic best fit (weighted)')

    ax=plt.gca()
    vals=ax.get_yticks()
    ax.set_yticklabels(['{:,.1f}%'.format(x) for x in vals])

    plt.ylabel('Daily % increase rate\n({} to {})'.format(dates[0],dates[-1]))
    plt.xlabel(what)

    # plt.xscale('symlog') # Meh.

    regionsUsed=sorted(list(set([UKCovid19Data.whichRegion(k) for k in interesting])))
    handles,labels = ax.get_legend_handles_labels()
    handles.extend([matplotlib.patches.Patch(color=UKCovid19Data.colorsByRegion[k],label=k) for k in regionsUsed])
    plt.legend(handles=handles,loc='upper right',prop={'size':6})

    plt.title('England, Scotland and Wales UTLAs: {}\nr={:.3f} (weighted), r={:.3f} (unweighted),'.format(filename,rw,r_value))

    distutils.dir_util.mkpath('output')
    plt.savefig('output/income-{}.png'.format(filename),dpi=96)

    correlation[what]=r_value
示例#6
0
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats

import UKCovid19Data

def cov(x, y, w):
    return np.sum(w * (x - np.average(x, weights=w)) * (y - np.average(y, weights=w))) / np.sum(w)

def corr(x, y, w):
    return cov(x, y, w) / np.sqrt(cov(x, x, w) * cov(y, y, w))

window=7

timeseries,dates,codes=UKCovid19Data.getUKCovid19Data(None,window+1,None)   # Need 8 days to get 7 growth rates.

print '***',dates

interesting=frozenset(timeseries.keys())
codeRewrites=UKCovid19Data.getUKCodeRewrites(interesting)
    
rate={k:(timeseries[k][-1]/timeseries[k][-1-window])**(1.0/(window))-1.0 for k in timeseries.keys() if timeseries[k][-1-window]>0.0}

for k in sorted(rate.keys(),key=lambda k: rate[k],reverse=True):
    print k,codes[k],rate[k]

def getLowerTierPopulation():

    csvfile=open('data/income/Population-Table 1.csv','rb')
    reader=csv.reader(csvfile)
示例#7
0
# at
# https://www.gov.uk/government/statistics/english-indices-of-deprivation-2019

import csv
import distutils.dir_util
import math
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import scipy.stats

import UKCovid19Data

window = 7

timeseries, dates, codes = UKCovid19Data.getUKCovid19Data(
    'England', window + 1, None)  # Need 8 days to get 7 growth rates.

rate = {
    k: (timeseries[k][-1] / timeseries[k][-1 - window])**(1.0 / (window)) - 1.0
    for k in timeseries.keys() if timeseries[k][-1 - window] > 0.0
}

for k in sorted(rate.keys(), key=lambda k: rate[k], reverse=True):
    print k, codes[k], rate[k]


def getDeprivation(filename, column, what):
    csvfile = open('data/deprivation/{}-Table 1.csv'.format(filename), 'rb')
    reader = csv.reader(csvfile)
    firstRow = True
示例#8
0
    fig = plt.figure(figsize=(16, 9))

    mdayslo = None
    mdayshi = None

    texts = []
    totalbase = 0.0
    numbase = 0

    for what in [
        ('England', None, datetime.date(2020, 3, 8)),
        ('Scotland', None, datetime.date(2020, 3, 8)),
        ('Wales', None, datetime.date(2020, 3, 21))
    ]:  # TODO: Northern Ireland data starts 26th March... but gappy 28th&29th?

        timeseries, days, codes = UKCovid19Data.getUKCovid19Data(
            *what, skip=set(['E06000017']))

        mdays = [mdates.date2num(d) for d in days]

        z = 0
        for k in sorted(timeseries.keys(),
                        key=lambda k: timeseries[k][-1],
                        reverse=False
                        ):  # Plot highest current case counts with higher z
            cases = np.array([y for y in timeseries[k]])

            if chart == 4 or chart == 5:
                cases = active(cases)

            assert len(days) == len(cases)
示例#9
0
            areas[lad].add(pcon)
            areas[lad].add(ward)

        if pcon in interesting:
            areas[pcon].add(pcon)
            areas[pcon].add(ward)

        if ward in interesting:
            areas[ward].add(ward)

    return dict(areas)


window = 7

timeseries, dates, codes = UKCovid19Data.getUKCovid19Data(
    'England', window + 1, None)

rate = {
    k: (timeseries[k][-1] / timeseries[k][-1 - window])**(1.0 / (window)) - 1.0
    for k in timeseries.keys() if timeseries[k][-1 - window] > 0.0
}
interesting = frozenset(rate.keys())
print len(interesting), 'interesting areas (from growth rate)'
areas = getAreas(interesting)

rawvotes = {2017: getRawVotes(2017), 2019: getRawVotes(2019)}

votes = {2017: {}, 2019: {}}

for year in [2017, 2019]:
    for c in interesting: