def probe(filename, column, what): deprivation = getDeprivation(filename, column, what) print what print ' Highest' for k in sorted(rate.keys(), key=lambda k: deprivation[k], reverse=True)[:10]: print ' {:32s}: {:.2f}'.format(codes[k], deprivation[k]) print ' Lowest' for k in sorted(rate.keys(), key=lambda k: deprivation[k], reverse=False)[:10]: print ' {:32s}: {:.2f}'.format(codes[k], deprivation[k]) y = np.array([100.0 * rate[k] for k in rate.keys()]) x = np.array([deprivation[k] for k in rate.keys()]) c = [ UKCovid19Data.colorsByRegion[UKCovid19Data.whichRegion(k)] for k in rate.keys() ] fig = plt.figure(figsize=(8, 6)) plt.scatter(x, y, color=c, alpha=0.8) r = scipy.stats.linregress(x, y) gradient, intercept, r_value, p_value, std_err = r rx = np.linspace(min(x), max(x), 100) ry = gradient * rx + intercept plt.plot(rx, ry, color='tab:red', label='Linear regression') coef = np.polyfit(x, y, 2) qy = coef[2] + coef[1] * rx + coef[0] * rx**2 plt.plot(rx, qy, color='tab:green', label='Quadratic best fit') ax = plt.gca() vals = ax.get_yticks() ax.set_yticklabels(['{:,.1f}%'.format(x) for x in vals]) plt.ylabel('Daily % increase rate\n({} to {})'.format(dates[0], dates[-1])) plt.xlabel(what) plt.title('Deprivation: {}\nr={:.3f}'.format(what, r_value)) regionsUsed = sorted( list(set([UKCovid19Data.whichRegion(k) for k in rate.keys()]))) handles, labels = ax.get_legend_handles_labels() handles.extend([ matplotlib.patches.Patch(color=UKCovid19Data.colorsByRegion[k], label=k) for k in regionsUsed ]) plt.legend(handles=handles, loc='upper left', prop={'size': 6}) distutils.dir_util.mkpath('output') plt.savefig('output/deprivation-{}.png'.format( filename.replace('(%)', 'percentage')), dpi=96) correlation[what] = r_value
def plot(x,y,c,w,s,interesting): fig=plt.figure(figsize=(8,6)) plt.scatter(x,y,s=s,color=c,alpha=0.8) # Unweighted regression line r=scipy.stats.linregress(x,y) gradient,intercept,r_value,p_value,std_err=r print 'Unweighted',gradient,intercept rx=np.linspace(min(x),max(x),100) ry=gradient*rx+intercept plt.plot(rx,ry,color='tab:orange',label='Linear regression (unweighted)') # Weighted regression line coef=np.polyfit(x,y,1,w=w) print 'Weighted',coef[0],coef[1] ry=coef[1]+coef[0]*rx # Highest power first plt.plot(rx,ry,color='tab:red',label='Linear regression (weighted by total votes)') rw=corr(x,y,w) ax=plt.gca() vals=ax.get_yticks() ax.set_yticklabels(['{:,.1f}%'.format(x) for x in vals]) vals=ax.get_xticks() ax.set_xticklabels(['{:,.1f}%'.format(x) for x in vals]) regionsUsed=sorted(list(set([UKCovid19Data.whichRegion(k) for k in interesting]))) handles,labels = ax.get_legend_handles_labels() handles.extend([matplotlib.patches.Patch(color=UKCovid19Data.colorsByRegion[k],label=k) for k in regionsUsed]) plt.legend(handles=handles,loc='upper left',prop={'size':6}) return r.rvalue,rw
regionsUsed=sorted(list(set([UKCovid19Data.whichRegion(k) for k in interesting]))) handles,labels = ax.get_legend_handles_labels() handles.extend([matplotlib.patches.Patch(color=UKCovid19Data.colorsByRegion[k],label=k) for k in regionsUsed]) plt.legend(handles=handles,loc='upper left',prop={'size':6}) return r.rvalue,rw plots=[('England',7,'England'),(None,7,'England, Scotland and Wales')] #,('Scotland',7,'Scotland'),('Wales',5,'Wales') for p in range(len(plots)): what=plots[p] print what[2] window=what[1] timeseries,dates,codes=UKCovid19Data.getUKCovid19Data(what[0],window+1,None) # Need 8 days to get 7 growth rates. print len(timeseries),'timeseries' for c in timeseries.keys(): print ' ',c,codes[c],timeseries[c] interesting=frozenset(timeseries.keys()) codeRewrites=UKCovid19Data.getUKCodeRewrites(interesting) votesTotal,votesLeave=getVotesLeave(codeRewrites,interesting) # Couple of fixups to census data codeRewrites['E06000048']='E06000057' # Northumberland codeRewrites['E08000020']='E08000037' # Gateshead populationTotal,populationAged=getDemographics(codeRewrites,interesting) oldies={k:populationAged[k]/populationTotal[k] for k in populationTotal.keys()}
#!/usr/bin/env python # -*- coding: utf-8 -*- import distutils.dir_util import math import numpy as np import UKCovid19Data for what in [('England', 7, None), ('Scotland', 7, None), ('Wales', 7, None), (None, 7, None)]: timeseries, days, codes = UKCovid19Data.getUKCovid19Data( what[0], what[1] + 1, what[2]) print '------' print what[0], days[0], days[-1], len(days) assert len(days) == what[1] + 1 print 'Top 20 case counts' for k in sorted(timeseries, key=lambda k: timeseries[k][-1], reverse=True)[:20]: print ' {:32s}: {:d}'.format(codes[k], int(timeseries[k][-1])) print window = what[1] growth = { k: (timeseries[k][-1] / timeseries[k][-1 - window])**(1.0 / window) for k in timeseries if timeseries[k][-1 - window] > 0.0
def probe(filename,column,what,lowerTierPopulation): income,population=getIncome(filename,column,what,lowerTierPopulation) print what for k in rate.keys(): if not k in income: print 'No income for',k,codes[k] print ' Highest' for k in sorted(rate.keys(),key=lambda k: income[k],reverse=True)[:10]: print ' {:32s}: {:.2f}'.format(codes[k],income[k]) print ' Lowest' for k in sorted(rate.keys(),key=lambda k: income[k],reverse=False)[:10]: print ' {:32s}: {:.2f}'.format(codes[k],income[k]) interesting=sorted(rate.keys(),key=lambda k: population[k],reverse=True) y=np.array([100.0*rate[k] for k in interesting]) x=np.array([income[k] for k in interesting]) c=[UKCovid19Data.colorsByRegion[UKCovid19Data.whichRegion(k)] for k in interesting] w=np.array([population[k] for k in interesting]) s=np.sqrt(w/50.0) fig=plt.figure(figsize=(8,6)) plt.scatter(x,y,color=c,alpha=0.8,s=s) r=scipy.stats.linregress(x,y) gradient,intercept,r_value,p_value,std_err=r rx=np.linspace(min(x),max(x),100) ry=gradient*rx+intercept plt.plot(rx,ry,color='tab:orange',label='Linear regression (unweighted)') coef=np.polyfit(x,y,1,w=w) ry=coef[1]+coef[0]*rx plt.plot(rx,ry,color='tab:red',label='Linear regression (weighted)') rw=corr(x,y,w) coef=np.polyfit(x,y,2,w=w) qy=coef[2]+coef[1]*rx+coef[0]*rx**2 plt.plot(rx,qy,color='tab:green',label='Quadratic best fit (weighted)') ax=plt.gca() vals=ax.get_yticks() ax.set_yticklabels(['{:,.1f}%'.format(x) for x in vals]) plt.ylabel('Daily % increase rate\n({} to {})'.format(dates[0],dates[-1])) plt.xlabel(what) # plt.xscale('symlog') # Meh. regionsUsed=sorted(list(set([UKCovid19Data.whichRegion(k) for k in interesting]))) handles,labels = ax.get_legend_handles_labels() handles.extend([matplotlib.patches.Patch(color=UKCovid19Data.colorsByRegion[k],label=k) for k in regionsUsed]) plt.legend(handles=handles,loc='upper right',prop={'size':6}) plt.title('England, Scotland and Wales UTLAs: {}\nr={:.3f} (weighted), r={:.3f} (unweighted),'.format(filename,rw,r_value)) distutils.dir_util.mkpath('output') plt.savefig('output/income-{}.png'.format(filename),dpi=96) correlation[what]=r_value
import matplotlib import matplotlib.pyplot as plt import numpy as np import scipy.stats import UKCovid19Data def cov(x, y, w): return np.sum(w * (x - np.average(x, weights=w)) * (y - np.average(y, weights=w))) / np.sum(w) def corr(x, y, w): return cov(x, y, w) / np.sqrt(cov(x, x, w) * cov(y, y, w)) window=7 timeseries,dates,codes=UKCovid19Data.getUKCovid19Data(None,window+1,None) # Need 8 days to get 7 growth rates. print '***',dates interesting=frozenset(timeseries.keys()) codeRewrites=UKCovid19Data.getUKCodeRewrites(interesting) rate={k:(timeseries[k][-1]/timeseries[k][-1-window])**(1.0/(window))-1.0 for k in timeseries.keys() if timeseries[k][-1-window]>0.0} for k in sorted(rate.keys(),key=lambda k: rate[k],reverse=True): print k,codes[k],rate[k] def getLowerTierPopulation(): csvfile=open('data/income/Population-Table 1.csv','rb') reader=csv.reader(csvfile)
# at # https://www.gov.uk/government/statistics/english-indices-of-deprivation-2019 import csv import distutils.dir_util import math import matplotlib import matplotlib.pyplot as plt import numpy as np import scipy.stats import UKCovid19Data window = 7 timeseries, dates, codes = UKCovid19Data.getUKCovid19Data( 'England', window + 1, None) # Need 8 days to get 7 growth rates. rate = { k: (timeseries[k][-1] / timeseries[k][-1 - window])**(1.0 / (window)) - 1.0 for k in timeseries.keys() if timeseries[k][-1 - window] > 0.0 } for k in sorted(rate.keys(), key=lambda k: rate[k], reverse=True): print k, codes[k], rate[k] def getDeprivation(filename, column, what): csvfile = open('data/deprivation/{}-Table 1.csv'.format(filename), 'rb') reader = csv.reader(csvfile) firstRow = True
fig = plt.figure(figsize=(16, 9)) mdayslo = None mdayshi = None texts = [] totalbase = 0.0 numbase = 0 for what in [ ('England', None, datetime.date(2020, 3, 8)), ('Scotland', None, datetime.date(2020, 3, 8)), ('Wales', None, datetime.date(2020, 3, 21)) ]: # TODO: Northern Ireland data starts 26th March... but gappy 28th&29th? timeseries, days, codes = UKCovid19Data.getUKCovid19Data( *what, skip=set(['E06000017'])) mdays = [mdates.date2num(d) for d in days] z = 0 for k in sorted(timeseries.keys(), key=lambda k: timeseries[k][-1], reverse=False ): # Plot highest current case counts with higher z cases = np.array([y for y in timeseries[k]]) if chart == 4 or chart == 5: cases = active(cases) assert len(days) == len(cases)
areas[lad].add(pcon) areas[lad].add(ward) if pcon in interesting: areas[pcon].add(pcon) areas[pcon].add(ward) if ward in interesting: areas[ward].add(ward) return dict(areas) window = 7 timeseries, dates, codes = UKCovid19Data.getUKCovid19Data( 'England', window + 1, None) rate = { k: (timeseries[k][-1] / timeseries[k][-1 - window])**(1.0 / (window)) - 1.0 for k in timeseries.keys() if timeseries[k][-1 - window] > 0.0 } interesting = frozenset(rate.keys()) print len(interesting), 'interesting areas (from growth rate)' areas = getAreas(interesting) rawvotes = {2017: getRawVotes(2017), 2019: getRawVotes(2019)} votes = {2017: {}, 2019: {}} for year in [2017, 2019]: for c in interesting: