-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate-postcode-db.py
82 lines (67 loc) · 3.16 KB
/
generate-postcode-db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import sys
sys.path.append("./django/")
import mysite.settings
import os
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "mysite.settings")
from django.core.management import setup_environ
from django.db import transaction
setup_environ(mysite.settings)
import waw_app.models
import re
import time
import sys
from shapely.geometry.polygon import LinearRing,Polygon
from shapely.geometry import Point
from ostn02python.eastings_to_decimal_degrees import postcodes_to_points
from utils import get_constituency_list
from multiprocessing import Pool
def guess_constituency(postcode, point, constituencies):
best_guess = None
min_distance_so_far = 1000000 # impossibly high number to start with
for constituency in constituencies.keys():
if constituencies[constituency].distance(point) < min_distance_so_far:
best_guess = constituency
min_distance_so_far = constituencies[constituency].distance(point)
return best_guess
@transaction.commit_on_success
def map_postcodes_to_constituencies(postcode_file, constituencies, ids):
with open(postcode_file) as f:
postcode_constituencies = {}
unknowns = []
last_constituencies = [constituencies.keys()[0]]*5
n = 0
for postcode,point in postcodes_to_points(f):
constituency = None
n += 1
# The postcodes are in alphabetical order - so it's relatively
# likely that this postcode will be in the same constituency as one
# of the last five postcodes. Check those first.
for guess in reversed(last_constituencies):
if constituencies[guess].contains(point):
last_constituencies.remove(guess)
constituency = guess
# If that didn't work, we should loop through every constituency.
if postcode not in postcode_constituencies:
for possible_constituency in constituencies.keys():
if constituencies[possible_constituency].contains(point):
constituency = possible_constituency
break
# Sometimes we don't get a result, usually because of postcodes on
# Scottish islands which fall outside the constituency boundaries
# we're using. Make a best guess.
if constituency is None:
constituency = guess_constituency(postcode, point, constituencies)
if constituency not in ids:
ids[constituency] = waw_app.models.Constituency.objects.filter(
name__exact=constituency)[0]
new_postcode = waw_app.models.Postcode.objects.create(
postcode=postcode, constituency=ids[constituency])
new_postcode.save()
last_constituencies.append(constituency)
if __name__ == "__main__":
constituencies = get_constituency_list()
for file_name in os.listdir("./raw_data/postcode_files/"):
print file_name
base = time.time()
map_postcodes_to_constituencies("".join(["./raw_data/postcode_files/", file_name]), constituencies, {})
print (time.time() - base)