def intersecting_blocks(block): """ Returns a list of blocks that intersect the given one. Note that blocks with the same street name and suffix are excluded -- this is a heuristic that keeps the adjacent blocks of the same street out. """ select_list = ["b.%s" % f.name for f in block._meta.fields] + ["ST_Intersection(a.geom, b.geom)"] table = block._meta.db_table cursor = connection.cursor() sql = """ SELECT %s FROM %s a, %s b WHERE a.id = %%s AND ST_Intersects(a.geom, b.geom) AND GeometryType(ST_Intersection(a.geom, b.geom)) = 'POINT' AND NOT (b.street = a.street AND b.suffix = a.suffix) ORDER BY b.predir, b.street, b.suffix, b.left_from_num, b.right_from_num """ % (", ".join(select_list), table, table) cursor.execute(sql, [block.id]) intersections = [] for row in cursor.fetchall(): block = Block(*row[:-1]) intersection_pt = fromstr(row[-1]) intersections.append((block, intersection_pt)) return intersections
def reverse_geocode(point): """ Looks up the nearest block to the point. Argument can be either a Point instance, or an (x, y) tuple, or a WKT string. Returns (block, distance (in degrees I think??)) """ # Defer import to avoid cyclical import. from ebpub.streets.models import Block if isinstance(point, basestring): from django.contrib.gis.geos import fromstr point = fromstr(point, srid=4326) elif isinstance(point, tuple) or isinstance(point, list): point = Point(tuple(point)) # In degrees for now because transforming to a projected space is # too slow for this purpose. TODO: store projected versions of the # locations alongside the canonical lng/lat versions. min_distance = 0.007 # We use min_distance to cut down on the searchable space, because # the distance query we do next that actually compares distances # between geometries does not use the spatial index. TODO: convert # this to GeoDjango syntax. Should be possible but there are some # subtleties / performance issues with the DB API. cursor = connection.cursor() # Switched to WKT rather than WKB, because constructing WKB as a # string leads to psycopg2 getting confused by '%' as per # http://stackoverflow.com/questions/1734814/why-isnt-psycopg2-executing-any-of-my-sql-functions-indexerror-tuple-index-ou # We could probably do something like # str(Binary(point.wkb)).replace('%', '%%') ... but I don't know # if that could have other problems? # Or maybe a Binary() could be passed as a parameter to cursor.execute(). # Anyway, WKT is safe. params = {'field_list': ', '.join([f.column for f in Block._meta.fields]), 'pt_wkt': point.wkt, 'geom_fieldname': 'geom', 'tablename': Block._meta.db_table, 'min_distance': min_distance } sql = """ SELECT %(field_list)s, ST_Distance(ST_GeomFromText('%(pt_wkt)s', 4326), %(geom_fieldname)s) AS "dist" FROM %(tablename)s WHERE id IN (SELECT id FROM %(tablename)s WHERE ST_DWithin(%(geom_fieldname)s, ST_GeomFromText('%(pt_wkt)s', 4326), %(min_distance)s)) ORDER BY "dist" LIMIT 1; """ % params cursor.execute(sql) num_fields = len(Block._meta.fields) rows = cursor.fetchall() if not rows: raise ReverseGeocodeError('No results') block, distance = [(Block(*row[:num_fields]), row[-1]) for row in rows][0] return block, distance
def save(self, verbose=True): num_created = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? block = Block(**block_fields) block.geom = feature.geom.geos (block.from_num, block.to_num) = make_block_numbers( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) street_name, block_name = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['street'], block_fields['suffix'], block_fields['postdir'] ) block.pretty_name = block_name block.street_pretty_name = street_name block.street_slug = slugify(' '.join((block_fields['street'], block_fields['suffix']))) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() num_created += 1 if verbose: print '%d\tCreated block %s for feature %d' % (num_created, block, feature.get('TLID')) return num_created
def _get_block(self): if self.block_center is None: return None # We buffer the center a bit because exact intersection # doesn't always get a match. from ebpub.utils.mapmath import buffer_by_meters geom = buffer_by_meters(self.block_center, BLOCK_FUZZY_DISTANCE_METERS) blocks = Block.objects.filter(geom__intersects=geom) if not blocks: raise Block.DoesNotExist( "No block found at lat %s, lon %s" % (self.block_center.y, self.block_center.x)) # If there's more than one this close, we don't really care. return blocks[0]
def test_add_by_place_id(self, mock_get_object_or_404): chain = FilterChain() from ebpub.streets.models import Block from ebpub.db.schemafilters import BlockFilter block = Block( city='city', street_slug='street_slug', pretty_name='pretty_name', street_pretty_name='street_pretty_name', street='street', from_num='123', to_num='456', ) mock_get_object_or_404.return_value = block chain.add_by_place_id('b:123.1') self.assert_(isinstance(chain['location'], BlockFilter))
def reverse_geocode(point): """ Looks up the nearest block to the point. """ # In degrees for now because transforming to a projected space is # too slow for this purpose. TODO: store projected versions of the # locations alongside the canonical lng/lat versions. min_distance = 0.007 # We use min_distance to cut down on the searchable space, because # the distance query we do next that actually compares distances # between geometries does not use the spatial index. TODO: convert # this to GeoDjango syntax. Should be possible but there are some # subtleties / performance issues with the DB API. cursor = connection.cursor() cursor.execute( """ SELECT %(field_list)s, ST_Distance(ST_GeomFromWKB(E%(pt_wkb)s, 4326), %(geom_fieldname)s) AS "dist" FROM %(tablename)s WHERE id IN (SELECT id FROM %(tablename)s WHERE ST_DWithin(%(geom_fieldname)s, ST_GeomFromWKB(E%(pt_wkb)s, 4326), %(min_distance)s)) ORDER BY "dist" LIMIT 1; """ % { 'field_list': ', '.join([f.column for f in Block._meta.fields]), 'pt_wkb': Binary(point.wkb), 'geom_fieldname': 'location', 'tablename': Block._meta.db_table, 'min_distance': min_distance }) num_fields = len(Block._meta.fields) try: block, distance = [(Block(*row[:num_fields]), row[-1]) for row in cursor.fetchall()][0] except IndexError: raise ReverseGeocodeError() return block, distance
def save(self, verbose=True): num_created = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): block = Block(**block_fields) block.geom = feature.geom.geos street_name, block_name = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['street'], block_fields['suffix'], block_fields['postdir']) block.pretty_name = block_name block.street_pretty_name = street_name block.street_slug = slugify(' '.join( (block_fields['street'], block_fields['suffix']))) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() num_created += 1 if verbose: print 'Created block %s' % block return num_created
def save(self): alt_names_suff = (u'', u'1', u'2', u'3', u'4', u'5') num_created = 0 for i, feature in enumerate(self.layer): #if not self.fcc_pat.search(feature.get('FCC')): # continue parent_id = None fields = {} for esri_fieldname, block_fieldname in FIELD_MAP.items(): value = feature.get(esri_fieldname) if isinstance(value, basestring): value = value.upper() elif isinstance(value, int) and value == 0: value = None fields[block_fieldname] = value if not ((fields['left_from_num'] and fields['left_to_num']) or (fields['right_from_num'] and fields['right_to_num'])): continue # Sometimes the "from" number is greater than the "to" # number in the source data, so we swap them into proper # ordering for side in ('left', 'right'): from_key, to_key = '%s_from_num' % side, '%s_to_num' % side if fields[from_key] > fields[to_key]: fields[from_key], fields[to_key] = fields[to_key], fields[from_key] if feature.geom.geom_name != 'LINESTRING': continue #for suffix in alt_names_suff: name_fields = {} for esri_fieldname, block_fieldname in NAME_FIELD_MAP.items(): key = esri_fieldname #+ suffix name_fields[block_fieldname] = feature.get(key).upper() if not name_fields['street']: continue # Skip blocks with bare number street names and no suffix / type if not name_fields['suffix'] and re.search('^\d+$', name_fields['street']): continue fields.update(name_fields) # Ensure we have unicode. for key, val in fields.items(): if isinstance(val, str): fields[key] = val.decode(self.encoding) fields['street_pretty_name'], fields['pretty_name'] = make_pretty_name( fields['left_from_num'], fields['left_to_num'], fields['right_from_num'], fields['right_to_num'], fields['predir'], fields['street'], fields['suffix'], fields['postdir'], ) print >> sys.stderr, 'Looking at block pretty name %s' % fields['street'] fields['street_slug'] = slugify(u' '.join((fields['street'], fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. But do this after making # pretty names. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): fields[addr_key] = fields[addr_key].rstrip(string.letters) fields['from_num'], fields['to_num'] = make_block_numbers( fields['left_from_num'], fields['left_to_num'], fields['right_from_num'], fields['right_to_num']) block = Block(**fields) block.geom = feature.geom.geos self.log(u'Looking at block %s' % fields['street']) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() num_created += 1 self.log('Created block %s' % block) return num_created
def save(self): alt_names_suff = (u'', u'1', u'2', u'3', u'4', u'5') num_created = 0 for i, feature in enumerate(self.layer): if not self.fcc_pat.search(feature.get('FCC')): continue parent_id = None fields = {} for esri_fieldname, block_fieldname in FIELD_MAP.items(): value = feature.get(esri_fieldname) if isinstance(value, basestring): value = value.upper() elif isinstance(value, int) and value == 0: value = None fields[block_fieldname] = value if not ((fields['left_from_num'] and fields['left_to_num']) or (fields['right_from_num'] and fields['right_to_num'])): continue # Sometimes the "from" number is greater than the "to" # number in the source data, so we swap them into proper # ordering for side in ('left', 'right'): from_key, to_key = '%s_from_num' % side, '%s_to_num' % side if fields[from_key] > fields[to_key]: fields[from_key], fields[to_key] = fields[to_key], fields[ from_key] if feature.geom.geom_name != 'LINESTRING': continue for suffix in alt_names_suff: name_fields = {} for esri_fieldname, block_fieldname in NAME_FIELD_MAP.items(): key = esri_fieldname + suffix name_fields[block_fieldname] = feature.get(key).upper() if not name_fields['street']: continue # Skip blocks with bare number street names and no suffix / type if not name_fields['suffix'] and re.search( '^\d+$', name_fields['street']): continue fields.update(name_fields) # Ensure we have unicode. for key, val in fields.items(): if isinstance(val, str): fields[key] = val.decode(self.encoding) fields['street_pretty_name'], fields[ 'pretty_name'] = make_pretty_name( fields['left_from_num'], fields['left_to_num'], fields['right_from_num'], fields['right_to_num'], fields['predir'], fields['street'], fields['suffix'], fields['postdir'], ) #print >> sys.stderr, 'Looking at block pretty name %s' % fields['street'] fields['street_slug'] = slugify(u' '.join( (fields['street'], fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. But do this after making # pretty names. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): fields[addr_key] = fields[addr_key].rstrip(string.letters) fields['from_num'], fields['to_num'] = make_block_numbers( fields['left_from_num'], fields['left_to_num'], fields['right_from_num'], fields['right_to_num']) block = Block(**fields) block.geom = feature.geom.geos self.log(u'Looking at block %s' % fields['street']) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() num_created += 1 self.log('Created block %s' % block) return num_created
def _makeBlock(self): block = Block(geom=LINESTRING) block.save() return block
def save(self): num_created = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? # Ensure we have unicode. for key, val in block_fields.items(): if isinstance(val, str): block_fields[key] = val.decode(self.encoding) block_fields['geom'] = feature.geom.geos block_fields['street_pretty_name'], block_fields[ 'pretty_name'] = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['street'], block_fields['suffix'], block_fields['postdir']) block_fields['street_slug'] = slugify(u' '.join( (block_fields['street'], block_fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. # But do this *after* making pretty names. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): if isinstance(block_fields[addr_key], basestring): value = block_fields[addr_key].rstrip( string.letters) # Also attempt to fix up addresses like # '19-47', by just using the lower number. # This will give misleading output, but # it's probably better than discarding blocks. value = value.split('-')[0] if value: try: value = int(value) except ValueError: self.log("Omitting weird value %r for %r" % (value, addr_key)) value = None else: value = None block_fields[addr_key] = value block_fields['from_num'], block_fields[ 'to_num'] = make_block_numbers( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) block = Block(**block_fields) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() num_created += 1 self.log('%d\tCreated block %s for feature %d' % (num_created, block, feature.get('TLID'))) return num_created
def save(self): if self.reset: logger.warn("Deleting all Block instances and anything that refers to them!") Block.objects.all().delete() import time start = time.time() num_created = 0 num_existing = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? # Ensure we have unicode. for key, val in block_fields.items(): if isinstance(val, str): block_fields[key] = val.decode(self.encoding) block_fields['geom'] = geos_with_projection(feature.geom, 4326) block_fields['prefix'] = make_pretty_prefix(block_fields['prefix']) block_fields['street_pretty_name'], block_fields['pretty_name'] = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['prefix'], block_fields['street'], block_fields['suffix'], block_fields['postdir'] ) block_fields['street_slug'] = slugify( u' '.join((block_fields['prefix'], block_fields['street'], block_fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. # But do this *after* making pretty names. # Also attempt to fix up addresses like '19-47', # by just using the lower number. This will give # misleading output, but it's probably better than # discarding blocks. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): if isinstance(block_fields[addr_key], basestring): from ebpub.geocoder.parser.parsing import number_standardizer value = number_standardizer(block_fields[addr_key].strip()) if not value: value = None else: value = None block_fields[addr_key] = value try: block_fields['from_num'], block_fields['to_num'] = \ make_block_numbers(block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) except ValueError, e: logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e)) continue # After doing pretty names etc, standardize the fields # that get used for geocoding, since the geocoder # searches for the standardized version. from ebpub.geocoder.parser.parsing import STANDARDIZERS for key, standardizer in STANDARDIZERS.items(): if key in block_fields: if key == 'street' and block_fields['prefix']: # Special case: "US Highway 101", not "US Highway 101st". continue block_fields[key] = standardizer(block_fields[key]) # Separate out the uniquely identifying fields so # we can avoid duplicate blocks. # NOTE this doesn't work if you're updating from a more # recent shapefile and the street has significant # changes - eg. the street name has changed, or the # address range has changed, or the block has split... # see #257. http://developer.openblockproject.org/ticket/257 primary_fields = {} primary_field_keys = ('street_slug', 'from_num', 'to_num', 'left_city', 'right_city', 'left_zip', 'right_zip', 'left_state', 'right_state', ) for key in primary_field_keys: if block_fields[key] != u'': # Some empty fields are fixed # automatically by clean(). primary_fields[key] = block_fields[key] existing = list(Block.objects.filter(**primary_fields)) if not existing: # Check the old-style way we used to make street slugs # prior to fixing issue #264... we need to keep this # code around indefinitely in case we are reloading the # blocks data and need to overwrite blocks that have # the old bad slug. Sadly this probably can't just be # fixed by a migration. _old_street_slug = slugify( u' '.join((block_fields['street'], block_fields['suffix']))) _old_primary_fields = primary_fields.copy() _old_primary_fields['street_slug'] = _old_street_slug existing = list(Block.objects.filter(**_old_primary_fields)) if not existing: block = Block(**block_fields) num_created += 1 logger.debug("CREATING %s" % unicode(block)) if len(existing) == 1: num_existing += 1 block = existing[0] logger.debug(u"Block %s already exists" % unicode(existing[0])) for key, val in block_fields.items(): setattr(block, key, val) elif len(existing) > 1: num_existing += len(existing) logger.warn("Multiple existing blocks like %s, skipping" % existing[0]) continue try: block.full_clean() except ValidationError: # odd bug: sometimes we get ValidationError even when # the data looks good, and then cleaning again works??? try: block.full_clean() except ValidationError, e: logger.warn("validation error on %s, skipping" % str(block)) logger.warn(e) continue block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.fid))
def save(self, verbose=False): alt_names_suff = ('', '1', '2', '3', '4', '5') num_created = 0 for i, feature in enumerate(self.layer): if not self.fcc_pat.search(feature.get('FCC')): continue parent_id = None fields = {} for esri_fieldname, block_fieldname in FIELD_MAP.items(): value = feature.get(esri_fieldname) if isinstance(value, basestring): value = value.upper() elif isinstance(value, int) and value == 0: value = None fields[block_fieldname] = value if not ((fields['left_from_num'] and fields['left_to_num']) or (fields['right_from_num'] and fields['right_to_num'])): continue # Sometimes the "from" number is greater than the "to" # number in the source data, so we swap them into proper # ordering for side in ('left', 'right'): from_key, to_key = '%s_from_num' % side, '%s_to_num' % side if fields[from_key] > fields[to_key]: fields[from_key], fields[to_key] = fields[to_key], fields[from_key] if feature.geom.geom_name != 'LINESTRING': continue for suffix in alt_names_suff: name_fields = {} for esri_fieldname, block_fieldname in NAME_FIELD_MAP.items(): key = esri_fieldname + suffix name_fields[block_fieldname] = feature.get(key).upper() if not name_fields['street']: continue # Skip blocks with bare number street names and no suffix / type if not name_fields['suffix'] and re.search('^\d+$', name_fields['street']): continue fields.update(name_fields) block = Block(**fields) block.geom = feature.geom.geos street_name, block_name = make_pretty_name( fields['left_from_num'], fields['left_to_num'], fields['right_from_num'], fields['right_to_num'], fields['predir'], fields['street'], fields['suffix'], fields['postdir'] ) block.pretty_name = block_name block.street_pretty_name = street_name block.street_slug = slugify(' '.join((fields['street'], fields['suffix']))) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() num_created += 1 if verbose: print >> sys.stderr, 'Created block %s' % block return num_created
def save(self): import time start = time.time() num_created = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? # Ensure we have unicode. for key, val in block_fields.items(): if isinstance(val, str): block_fields[key] = val.decode(self.encoding) block_fields['geom'] = feature.geom.geos block_fields['street_pretty_name'], block_fields['pretty_name'] = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['street'], block_fields['suffix'], block_fields['postdir'] ) block_fields['street_slug'] = slugify(u' '.join((block_fields['street'], block_fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. # But do this *after* making pretty names. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): if isinstance(block_fields[addr_key], basestring): value = block_fields[addr_key].rstrip(string.letters) # Also attempt to fix up addresses like # '19-47', by just using the lower number. # This will give misleading output, but # it's probably better than discarding blocks. value = value.split('-')[0] if value: try: value = int(value) except ValueError: logger.warn("Omitting weird value %r for %r" % (value, addr_key)) value = None else: value = None block_fields[addr_key] = value try: block_fields['from_num'], block_fields['to_num'] = \ make_block_numbers(block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) except ValueError, e: logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e)) continue # Separate out the uniquely identifying fields so # we can avoid duplicate blocks. # NOTE this doesn't work if you're updating from a more # recent shapefile and the street has significant # changes - eg. the street name has changed, or the # address range has changed, or the block has split... primary_fields = {} primary_field_keys = ('street_slug', 'from_num', 'to_num', 'left_city', 'right_city', 'left_zip', 'right_zip', 'left_state', 'right_state', ) for key in primary_field_keys: if block_fields[key] != u'': # Some empty fields are fixed # automatically by clean(), so primary_fields[key] = block_fields[key] existing = list(Block.objects.filter(**primary_fields)) if not existing: block = Block(**block_fields) num_created += 1 elif len(existing) == 1: block = existing[0] logger.debug(u"Block %s already exists" % unicode(existing[0])) for key, val in block_fields.items(): setattr(block, key, val) else: logger.warn("Multiple existing blocks like %s, skipping" % existing[0]) continue try: block.full_clean() except ValidationError: # odd bug: sometimes we get ValidationError even when # the data looks good, and then cleaning again works??? try: block.full_clean() except ValidationError, e: logger.warn("validation error on %s, skipping" % str(block)) logger.warn(e) continue logger.debug("CREATING %s" % unicode(block)) block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.get('TLID')))
def save(self): num_created = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? # Ensure we have unicode. for key, val in block_fields.items(): if isinstance(val, str): block_fields[key] = val.decode(self.encoding) block_fields['geom'] = feature.geom.geos block_fields['street_pretty_name'], block_fields['pretty_name'] = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['street'], block_fields['suffix'], block_fields['postdir'] ) block_fields['street_slug'] = slugify(u' '.join((block_fields['street'], block_fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. # But do this *after* making pretty names. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): if isinstance(block_fields[addr_key], basestring): value = block_fields[addr_key].rstrip(string.letters) # Also attempt to fix up addresses like # '19-47', by just using the lower number. # This will give misleading output, but # it's probably better than discarding blocks. value = value.split('-')[0] if value: try: value = int(value) except ValueError: logger.warn("Omitting weird value %r for %r" % (value, addr_key)) value = None else: value = None block_fields[addr_key] = value try: block_fields['from_num'], block_fields['to_num'] = \ make_block_numbers(block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) except ValueError, e: logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e)) continue block = Block(**block_fields) try: block.full_clean() except ValidationError: # odd bug: sometimes we get ValidationError even when # the data looks good, and then cleaning again works??? try: block.full_clean() except ValidationError, e: logger.warn("validation error on %s, skipping" % str(block)) logger.warn(e) continue block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() num_created += 1 logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.get('TLID')))
def save(self): if self.reset: logger.warn( "Deleting all Block instances and anything that refers to them!" ) Block.objects.all().delete() import time start = time.time() num_created = 0 num_existing = 0 for feature in self.layer: parent_id = None if not self.skip_feature(feature): for block_fields in self.gen_blocks(feature): # Usually (at least in Boston data) there is only # 1 block per feature. But sometimes there are # multiple names for one street, eg. # "N. Commercial Wharf" and "Commercial Wharf N."; # in that case those would be yielded by gen_blocks() as # two separate blocks. Is that intentional, or a bug? # Ensure we have unicode. for key, val in block_fields.items(): if isinstance(val, str): block_fields[key] = val.decode(self.encoding) block_fields['geom'] = geos_with_projection( feature.geom, 4326) block_fields['prefix'] = make_pretty_prefix( block_fields['prefix']) block_fields['street_pretty_name'], block_fields[ 'pretty_name'] = make_pretty_name( block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num'], block_fields['predir'], block_fields['prefix'], block_fields['street'], block_fields['suffix'], block_fields['postdir']) block_fields['street_slug'] = slugify(u' '.join( (block_fields['prefix'], block_fields['street'], block_fields['suffix']))) # Watch out for addresses like '247B' which can't be # saved as an IntegerField. # But do this *after* making pretty names. # Also attempt to fix up addresses like '19-47', # by just using the lower number. This will give # misleading output, but it's probably better than # discarding blocks. for addr_key in ('left_from_num', 'left_to_num', 'right_from_num', 'right_to_num'): if isinstance(block_fields[addr_key], basestring): from ebpub.geocoder.parser.parsing import number_standardizer value = number_standardizer( block_fields[addr_key].strip()) if not value: value = None else: try: value = str(int(value)) except (ValueError, TypeError): value = None block_fields[addr_key] = value try: block_fields['from_num'], block_fields['to_num'] = \ make_block_numbers(block_fields['left_from_num'], block_fields['left_to_num'], block_fields['right_from_num'], block_fields['right_to_num']) except ValueError, e: logger.warn('Skipping %s: %s' % (block_fields['pretty_name'], e)) continue # After doing pretty names etc, standardize the fields # that get used for geocoding, since the geocoder # searches for the standardized version. from ebpub.geocoder.parser.parsing import STANDARDIZERS for key, standardizer in STANDARDIZERS.items(): if key in block_fields: if key == 'street' and block_fields['prefix']: # Special case: "US Highway 101", not "US Highway 101st". continue block_fields[key] = standardizer(block_fields[key]) # Separate out the uniquely identifying fields so # we can avoid duplicate blocks. # NOTE this doesn't work if you're updating from a more # recent shapefile and the street has significant # changes - eg. the street name has changed, or the # address range has changed, or the block has split... # see #257. http://developer.openblockproject.org/ticket/257 primary_fields = {} primary_field_keys = ( 'street_slug', 'from_num', 'to_num', 'left_city', 'right_city', 'left_zip', 'right_zip', 'left_state', 'right_state', ) for key in primary_field_keys: if block_fields[key] != u'': # Some empty fields are fixed # automatically by clean(). primary_fields[key] = block_fields[key] existing = list(Block.objects.filter(**primary_fields)) if not existing: # Check the old-style way we used to make street slugs # prior to fixing issue #264... we need to keep this # code around indefinitely in case we are reloading the # blocks data and need to overwrite blocks that have # the old bad slug. Sadly this probably can't just be # fixed by a migration. _old_street_slug = slugify(u' '.join( (block_fields['street'], block_fields['suffix']))) _old_primary_fields = primary_fields.copy() _old_primary_fields['street_slug'] = _old_street_slug existing = list( Block.objects.filter(**_old_primary_fields)) if not existing: block = Block(**block_fields) num_created += 1 logger.debug("CREATING %s" % unicode(block)) if len(existing) == 1: num_existing += 1 block = existing[0] logger.debug(u"Block %s already exists" % unicode(existing[0])) for key, val in block_fields.items(): setattr(block, key, val) elif len(existing) > 1: num_existing += len(existing) logger.warn( "Multiple existing blocks like %s, skipping" % existing[0]) continue try: block.full_clean() except ValidationError: # odd bug: sometimes we get ValidationError even when # the data looks good, and then cleaning again works??? try: block.full_clean() except ValidationError, e: logger.warn("validation error on %s, skipping" % str(block)) logger.warn(e) continue block.save() if parent_id is None: parent_id = block.id else: block.parent_id = parent_id block.save() logger.debug('%d\tCreated block %s for feature %d' % (num_created, block, feature.fid))