def match(): preassigned = set([p for p in OSM_Place.all() if p.simc_place]) assigned = set(preassigned) reporting.output_msg("start", u"%i wstępnie (w danych OSM) przypisanych miejscowości" % (len(preassigned),)) places_to_match = set([p for p in OSM_Place.all() if not p.simc_place]) osm_matched1, simc_matched1 = match_names(1, places_to_match) assigned |= osm_matched1 grid = Grid(assigned, 31, 31) osm_matched2, simc_matched2 = match_names(2, places_to_match, grid) assigned |= osm_matched2 grid = Grid(assigned, 43, 43) osm_matched3, simc_matched3 = match_names(3, places_to_match, grid) assigned |= osm_matched3 matched = osm_matched1 | osm_matched2 | osm_matched3 matched = refine(matched, assigned) assigned = set(preassigned).union(matched) return assigned
def match_names(pass_no, places_to_match, grid = None): reporting = Reporting() places_count = len(places_to_match) if grid: reporting.progress_start( u"Dopasowywanie nazw %i miejsc, przebieg %i, z siatką %s" % (places_count, pass_no, grid), places_count) else: reporting.progress_start( u"Dopasowywanie nazw %i miejsc, przebieg %i" % (places_count, pass_no), places_count) osm_matched = set() simc_matched = set() places = [ (str(p), p) for p in places_to_match ] for name, osm_place in places: reporting.progress() if osm_place.name is None: reporting.output_msg("errors", u"%r: brak nazwy" % (osm_place,), osm_place) continue # Find matching entry in SIMC try: matching_simc_places = SIMC_Place.by_name(osm_place.name) except KeyError: reporting.output_msg("not_found", u"%s: nie znaleziono w TERYT" % (osm_place,), osm_place) places_to_match.remove(osm_place) continue simc_places = [place for place in matching_simc_places if place.type == osm_place.normalized_type and place.osm_place is None] if not simc_places: types_found = [ place.type for place in matching_simc_places ] reporting.output_msg("bad_type", u"%s: nie znalezionow w TERYT" u" obiektu właściwego typu (%r, znaleziono: %r)" % ( osm_place, osm_place.type, types_found), osm_place) continue cell = None if grid: try: cell = grid.get_cell(osm_place) except KeyError: pass if cell: simc_places = [ p for p in simc_places if p.powiat in cell.powiaty ] if len(simc_places) > 1: simc_places = [ p for p in simc_places if p.gmina in cell.gminy ] if not simc_places: reporting.output_msg("not_found", u"%s: nie znaleziono w TERYT miejsca" u" pasującego do komórki %s" % (osm_place, cell), osm_place) continue if len(simc_places) > 1: if grid: reporting.output_msg("ambigous%i" % (pass_no,), u"%s z OSM pasuje do wielu obiektów" u" SIMC w komórce %s: %s" % (osm_place, cell, u", ".join([str(p) for p in simc_places])), osm_place) else: reporting.output_msg("ambigous%i" % (pass_no,), u"%s z OSM pasuje do wielu obiektów w SIMC: %s" % (osm_place, u", ".join([str(p) for p in simc_places])), osm_place) continue simc_place = simc_places[0] # now check if reverse assignment is not ambigous matching_osm_places = OSM_Place.by_name(simc_place.name) confl_osm_places = [] for place in matching_osm_places: if place is osm_place: continue if cell: try: g_cell = grid.get_cell(place) except KeyError: g_cell = None if g_cell is not cell: continue if place.gmina and place.gmina != simc_place.gmina: continue if place.powiat and place.powiat != simc_place.powiat: continue if place.wojewodztwo and place.wojewodztwo != simc_place.wojewodztwo: continue confl_osm_places.append(place) if confl_osm_places: reporting.output_msg("ambigous%i" % (pass_no,), u"%s z SIMC pasuje do wielu obiektów w OMS: %s" % (simc_place, ", ".join([str(p) for p in confl_osm_places])), osm_place) continue if simc_place.osm_place: reporting.output_msg("ambigous%i" % (pass_no,), u"%s z SIMC ma już przypisany obiekt OSM: %s" % ( simc_place, simc_place.osm_place), osm_place) # good match osm_place.assign_simc(simc_place) simc_place.assign_osm(osm_place) reporting.output_msg("match", u"%s w OSM to %s w SIMC" % (osm_place, simc_place), osm_place) osm_matched.add(osm_place) simc_matched.add(simc_place) places_to_match.remove(osm_place) reporting.progress_stop() reporting.output_msg("stats", u"Przebieg %i: znaleziono w SIMC %i z %i miejscowości OSM" % ( pass_no, len(osm_matched), places_count)) return osm_matched, simc_matched