def handle_label(self, filename, **options): current_generation = Generation.objects.current() new_generation = Generation.objects.new() if not new_generation: raise Exception("No new generation to be used for import!") print(filename) # Need to parse the KML manually to get the ExtendedData kml_data = KML() xml.sax.parse(filename, kml_data) code_type_osm = CodeType.objects.get(code='osm') code_type_n5000 = CodeType.objects.get(code='n5000') ds = DataSource(filename) layer = ds[0] for feat in layer: name = feat['Name'].value if not isinstance(name, six.text_type): name = name.decode('utf-8') name = re.sub('\s+', ' ', name) print(" %s" % smart_str(name)) code = int(kml_data.data[name]['ref']) if code == 301: # Oslo ref in OSM could be either 3 (fylke) or 301 (kommune). Make sure it's 3. code = 3 if code < 100: # Not particularly nice, but fine area_code = 'NFY' parent_area = None code_str = '%02d' % code else: area_code = 'NKO' code_str = '%04d' % code parent_area = Area.objects.get(id=int(code_str[0:2])) def update_or_create(): try: m = Area.objects.get(id=code) except Area.DoesNotExist: m = Area( id=code, name=name, type=Type.objects.get(code=area_code), country=Country.objects.get(code='O'), parent_area=parent_area, generation_low=new_generation, generation_high=new_generation, ) if m.generation_high and current_generation and m.generation_high.id < current_generation.id: raise Exception( "Area %s found, but not in current generation %s" % (m, current_generation)) m.generation_high = new_generation g = feat.geom.transform(4326, clone=True) poly = [g] if options['commit']: m.save() for k, v in kml_data.data[name].items(): if k in ('name:smi', 'name:fi'): lang = 'N' + k[5:] m.names.update_or_create( type=NameType.objects.get(code=lang), defaults={'name': v}) m.codes.update_or_create(type=code_type_n5000, defaults={'code': code_str}) m.codes.update_or_create( type=code_type_osm, defaults={'code': int(kml_data.data[name]['osm'])}) save_polygons({code: (m, poly)}) update_or_create() # Special case Oslo so it's in twice, once as fylke, once as kommune if code == 3: code, area_code, parent_area, code_str = 301, 'NKO', Area.objects.get( id=3), '0301' update_or_create()
def handle_label(self, directory_name, **options): current_generation = Generation.objects.current() if not os.path.isdir(directory_name): raise Exception, "'%s' is not a directory" % (directory_name, ) os.chdir(directory_name) skip_up_to = None # skip_up_to = 'relation-80370' skipping = bool(skip_up_to) osm_elements_seen_in_new_data = set([]) with open("/home/mark/difference-results.csv", 'w') as fp: csv_writer = csv.writer(fp) csv_writer.writerow([ "ElementType", "ElementID", "ExistedPreviously", "PreviousEmpty", "PreviousArea", "NewEmpty", "NewArea", "SymmetricDifferenceArea", "GEOSEquals", "GEOSEqualsExact" ]) for admin_directory in sorted(x for x in os.listdir('.') if os.path.isdir(x)): if not re.search('^[A-Z0-9]{3}$', admin_directory): print "Skipping a directory that doesn't look like a MapIt type:", admin_directory if not os.path.exists(admin_directory): continue files = sorted(os.listdir(admin_directory)) total_files = len(files) for i, e in enumerate(files): progress = "[%d%% complete] " % ((i * 100) / total_files, ) if skipping: if skip_up_to in e: skipping = False else: continue if not e.endswith('.kml'): continue m = re.search(r'^(way|relation)-(\d+)-', e) if not m: raise Exception, u"Couldn't extract OSM element type and ID from: " + e osm_type, osm_id = m.groups() osm_elements_seen_in_new_data.add((osm_type, osm_id)) kml_filename = os.path.join(admin_directory, e) # Need to parse the KML manually to get the ExtendedData kml_data = KML() print "parsing", kml_filename xml.sax.parse(kml_filename, kml_data) useful_names = [ n for n in kml_data.data.keys() if not n.startswith('Boundaries for') ] if len(useful_names) == 0: raise Exception, "No useful names found in KML data" elif len(useful_names) > 1: raise Exception, "Multiple useful names found in KML data" name = useful_names[0] print " ", name.encode('utf-8') if osm_type == 'relation': code_type_osm = CodeType.objects.get(code='osm_rel') elif osm_type == 'way': code_type_osm = CodeType.objects.get(code='osm_way') else: raise Exception, "Unknown OSM element type:", osm_type ds = DataSource(kml_filename) if len(ds) != 1: raise Exception, "We only expect one layer in a DataSource" layer = ds[0] if len(layer) != 1: raise Exception, "We only expect one feature in each layer" feat = layer[0] area_code = admin_directory osm_codes = list( Code.objects.filter(type=code_type_osm, code=osm_id)) osm_codes.sort( key=lambda e: e.area.generation_high.created) new_area = None new_valid = None new_empty = None previous_area = None previous_valid = None previous_empty = None symmetric_difference_area = None g = feat.geom.transform(4326, clone=True) new_some_nonempty = False for polygon in g: if polygon.point_count < 4: new_empty = True else: new_some_nonempty = True if not new_empty: new_geos_geometry = g.geos.simplify(tolerance=0) new_area = new_geos_geometry.area new_empty = new_geos_geometry.empty geos_equals = None geos_equals_exact = None most_recent_osm_code = None if osm_codes: most_recent_osm_code = osm_codes[-1] previous_geos_geometry = most_recent_osm_code.area.polygons.collect( ) previous_empty = previous_geos_geometry is None if not previous_empty: previous_geos_geometry = previous_geos_geometry.simplify( tolerance=0) previous_area = previous_geos_geometry.area if not new_empty: symmetric_difference_area = previous_geos_geometry.sym_difference( new_geos_geometry).area geos_equals = previous_geos_geometry.equals( new_geos_geometry) geos_equals_exact = previous_geos_geometry.equals_exact( new_geos_geometry) csv_writer.writerow([ osm_type, osm_id, bool(osm_codes), # ExistedPreviously empty_if_none(previous_empty), empty_if_none(previous_area), empty_if_none(new_empty), empty_if_none(new_area), empty_if_none(symmetric_difference_area), empty_if_none(geos_equals), empty_if_none(geos_equals_exact) ])
def handle_label(self, directory_name, **options): current_generation = Generation.objects.current() new_generation = Generation.objects.new() if not new_generation: raise Exception("No new generation to be used for import!") if not os.path.isdir(directory_name): raise Exception("'%s' is not a directory" % (directory_name,)) os.chdir(directory_name) mapit_type_glob = smart_text("[A-Z0-9][A-Z0-9][A-Z0-9]") if not glob(mapit_type_glob): raise Exception( "'%s' did not contain any directories that look like MapIt types (e.g. O11, OWA, etc.)" % ( directory_name,)) def verbose(s): if int(options['verbosity']) > 1: print(smart_str(s)) verbose("Loading any admin boundaries from " + directory_name) verbose("Finding language codes...") language_code_to_name = {} code_keys = ('two_letter', 'three_letter') for row in get_iso639_2_table(): english_name = getattr(row, 'english_name') for k in code_keys: code = getattr(row, k) if not code: continue language_code_to_name[code] = english_name global_country = Country.objects.get(code='G') # print json.dumps(language_code_to_name, sort_keys=True, indent=4) skip_up_to = None # skip_up_to = 'relation-80370' skipping = bool(skip_up_to) for type_directory in sorted(glob(mapit_type_glob)): verbose("Loading type " + type_directory) if not os.path.exists(type_directory): verbose("Skipping the non-existent " + type_directory) continue verbose("Loading all KML in " + type_directory) files = sorted(os.listdir(type_directory)) total_files = len(files) for i, e in enumerate(files): progress = "[%d%% complete] " % ((i * 100) / total_files,) if skipping: if skip_up_to in e: skipping = False else: continue if not e.endswith('.kml'): verbose("Ignoring non-KML file: " + e) continue m = re.search(r'^(way|relation)-(\d+)-', e) if not m: raise Exception("Couldn't extract OSM element type and ID from: " + e) osm_type, osm_id = m.groups() kml_filename = os.path.join(type_directory, e) verbose(progress + "Loading " + os.path.realpath(kml_filename)) # Need to parse the KML manually to get the ExtendedData kml_data = KML() xml.sax.parse(smart_str(kml_filename), kml_data) useful_names = [n for n in kml_data.data.keys() if not n.startswith('Boundaries for')] if len(useful_names) == 0: raise Exception("No useful names found in KML data") elif len(useful_names) > 1: raise Exception("Multiple useful names found in KML data") name = useful_names[0] print(smart_str(" %s" % name)) if osm_type == 'relation': code_type_osm = CodeType.objects.get(code='osm_rel') elif osm_type == 'way': code_type_osm = CodeType.objects.get(code='osm_way') else: raise Exception("Unknown OSM element type: " + osm_type) ds = DataSource(kml_filename) layer = ds[0] if len(layer) != 1: raise Exception("We only expect one feature in each layer") feat = layer[1] g = feat.geom.transform(4326, clone=True) if g.geom_count == 0: # Just ignore any KML files that have no polygons in them: verbose(' Ignoring that file - it contained no polygons') continue # Nowadays, in generating the data we should have # excluded any "polygons" with less than four points # (the final one being the same as the first), but # just in case: polygons_too_small = 0 for polygon in g: if polygon.num_points < 4: polygons_too_small += 1 if polygons_too_small: message = "%d out of %d polygon(s) were too small" % (polygons_too_small, g.geom_count) verbose(' Skipping, since ' + message) continue g_geos = g.geos if not g_geos.valid: verbose(" Invalid KML:" + kml_filename) fixed_multipolygon = fix_invalid_geos_multipolygon(g_geos) if len(fixed_multipolygon) == 0: verbose(" Invalid polygons couldn't be fixed") continue g = fixed_multipolygon.ogr area_type = Type.objects.get(code=type_directory) try: osm_code = Code.objects.get(type=code_type_osm, code=osm_id, area__generation_high__lte=current_generation, area__generation_high__gte=current_generation) except Code.DoesNotExist: verbose(' No area existed in the current generation with that OSM element type and ID') osm_code = None was_the_same_in_current = False if osm_code: m = osm_code.area # First, we need to check if the polygons are # still the same as in the previous generation: previous_geos_geometry = m.polygons.aggregate(Collect('polygon'))['polygon__collect'] if previous_geos_geometry is None: verbose(' In the current generation, that area was empty - skipping') else: # Simplify it to make sure the polygons are valid: previous_geos_geometry = shapely.wkb.loads( str(previous_geos_geometry.simplify(tolerance=0).ewkb)) new_geos_geometry = shapely.wkb.loads(str(g.geos.simplify(tolerance=0).ewkb)) if previous_geos_geometry.almost_equals(new_geos_geometry, decimal=7): was_the_same_in_current = True else: verbose(' In the current generation, the boundary was different') if was_the_same_in_current: # Extend the high generation to the new one: verbose(' The boundary was identical in the previous generation; raising generation_high') m.generation_high = new_generation else: # Otherwise, create a completely new area: m = Area( name=name, type=area_type, country=global_country, parent_area=None, generation_low=new_generation, generation_high=new_generation, ) poly = [g] if options['commit']: m.save() verbose(' Area ID: ' + str(m.id)) if name not in kml_data.data: print(json.dumps(kml_data.data, sort_keys=True, indent=4)) raise Exception("Will fail to find '%s' in the dictionary" % (name,)) old_lang_codes = set(n.type.code for n in m.names.all()) for k, translated_name in kml_data.data[name].items(): language_name = None if k == 'name': lang = 'default' language_name = "OSM Default" else: name_match = re.search(r'^name:(.+)$', k) if name_match: lang = name_match.group(1) if lang in language_code_to_name: language_name = language_code_to_name[lang] if not language_name: continue old_lang_codes.discard(lang) # Otherwise, make sure that a NameType for this language exists: NameType.objects.update_or_create(code=lang, defaults={'description': language_name}) name_type = NameType.objects.get(code=lang) m.names.update_or_create(type=name_type, defaults={'name': translated_name}) if old_lang_codes: verbose('Removing deleted languages codes: ' + ' '.join(old_lang_codes)) m.names.filter(type__code__in=old_lang_codes).delete() # If the boundary was the same, the old Code # object will still be pointing to the same Area, # which just had its generation_high incremented. # In every other case, there's a new area object, # so create a new Code and save it: if not was_the_same_in_current: new_code = Code(area=m, type=code_type_osm, code=osm_id) new_code.save() save_polygons({'dummy': (m, poly)})
def handle_label(self, directory_name, **options): current_generation = Generation.objects.current() new_generation = Generation.objects.new() if not new_generation: raise Exception, "No new generation to be used for import!" if not os.path.isdir(directory_name): raise Exception, "'%s' is not a directory" % (directory_name, ) os.chdir(directory_name) if not glob("al[0-1][0-9]"): raise Exception, "'%s' did not contain any admin level directories (e.g. al02, al03, etc.)" % ( directory_name, ) def verbose(s): if options['verbose']: print s.encode('utf-8') verbose("Loading any admin boundaries from " + directory_name) verbose("Finding language codes...") language_code_to_name = {} code_keys = ('two_letter', 'three_letter') for row in get_iso639_2_table(): english_name = getattr(row, 'english_name') for k in code_keys: code = getattr(row, k) if not code: continue # Some of the language codes have a bibliographic or # terminology code, so strip those out: codes = re.findall(r'(\w+) \([BT]\)', code) if not codes: codes = [code] for c in codes: language_code_to_name[c] = english_name # print json.dumps(language_code_to_name, sort_keys=True, indent=4) skip_up_to = None # skip_up_to = 'relation-80370' skipping = bool(skip_up_to) for admin_level in range(2, 12): verbose("Loading admin_level " + str(admin_level)) admin_directory = "al%02d" % (admin_level) if not os.path.exists(admin_directory): verbose("Skipping the non-existent " + admin_directory) continue verbose("Loading all KML in " + admin_directory) files = sorted(os.listdir(admin_directory)) total_files = len(files) for i, e in enumerate(files): progress = "[%d%% complete] " % ((i * 100) / total_files, ) if skipping: if skip_up_to in e: skipping = False else: continue if not e.endswith('.kml'): verbose("Ignoring non-KML file: " + e) continue m = re.search(r'^(way|relation)-(\d+)-', e) if not m: raise Exception, u"Couldn't extract OSM element type and ID from: " + e osm_type, osm_id = m.groups() kml_filename = os.path.join(admin_directory, e) verbose(progress + "Loading " + unicode(os.path.realpath(kml_filename), 'utf-8')) # Need to parse the KML manually to get the ExtendedData kml_data = KML() xml.sax.parse(kml_filename, kml_data) useful_names = [ n for n in kml_data.data.keys() if not n.startswith('Boundaries for') ] if len(useful_names) == 0: raise Exception, "No useful names found in KML data" elif len(useful_names) > 1: raise Exception, "Multiple useful names found in KML data" name = useful_names[0] print " ", name.encode('utf-8') if osm_type == 'relation': code_type_osm = CodeType.objects.get(code='osm_rel') elif osm_type == 'way': code_type_osm = CodeType.objects.get(code='osm_way') else: raise Exception, "Unknown OSM element type:", osm_type ds = DataSource(kml_filename) layer = ds[0] if len(layer) != 1: raise Exception, "We only expect one feature in each layer" for feat in layer: area_code = 'O%02d' % (admin_level) # FIXME: perhaps we could try to find parent areas # via inclusion in higher admin levels parent_area = None try: osm_code = Code.objects.get(type=code_type_osm, code=osm_id) except Code.DoesNotExist: osm_code = None def update_or_create(): if osm_code: m = osm_code.area else: m = Area( name=name, type=Type.objects.get(code=area_code), country=Country.objects.get(code='G'), parent_area=parent_area, generation_low=new_generation, generation_high=new_generation, ) if m.generation_high and current_generation and m.generation_high.id < current_generation.id: raise Exception, "Area %s found, but not in current generation %s" % ( m, current_generation) m.generation_high = new_generation g = feat.geom.transform(4326, clone=True) # In generating the data we should have # excluded any "polygons" with less than four # points (the final one being the same as the # first), but just in case: for polygon in g: if g.num_points < 4: return poly = [g] if options['commit']: m.save() if name not in kml_data.data: print json.dumps(kml_data.data, sort_keys=True, indent=4) raise Exception, u"Will fail to find '%s' in the dictionary" % ( name, ) for k, v in kml_data.data[name].items(): language_name = None if k == 'name': lang = 'default' language_name = "OSM Default" else: name_match = re.search(r'^name:(.+)$', k) if name_match: lang = name_match.group(1) if lang in language_code_to_name: language_name = language_code_to_name[ lang] if not language_name: continue # Otherwise, make sure that a NameType for this language exists: NameType.objects.update_or_create( {'code': lang}, { 'code': lang, 'description': language_name }) name_type = NameType.objects.get(code=lang) m.names.update_or_create({'type': name_type}, {'name': v}) m.codes.update_or_create({'type': code_type_osm}, {'code': osm_id}) save_polygons({code: (m, poly)}) update_or_create()