def update_global_replacement_list(self, key, geoid, prefix): res = ReplacementDictionary.build_replacement_entry(geoid, prefix) ky = Normalize.normalize(text=key, remove_commas=False) self.global_replace.set(ky, res) # Periodically flush dictionary to disk if self.update_counter % 10 == 1: self.global_replace.write()
def process_place_entries(self): """ Handle PLACE entries in users file. Replace it, skip it, or have user correct it. """ self.w.original_entry.text = "" if self.w.prog.shutdown_requested: self.periodic_update("Shutting down...") else: self.periodic_update("Scanning") self.clear_result_list(self.place) while True: # Keep reading place entries until we need User review or reach End Of File self.update_counter += 1 # Counter is used to periodically update # Update statistics self.update_statistics() # Find the next PLACE entry in file # Process it and keep looping until we need user review self.place.clear() town_entry, eof, rec_id = self.ancestry_file_handler.get_next_place( ) self.place.updated_entry = town_entry self.place.id = rec_id town_entry = Normalize.normalize(text=town_entry, remove_commas=False) if eof: self.end_of_file_shutdown() # See if we already have a fix (Global Replace) or Skip (ignore). # Otherwise see if we can find it or have user handle it replacement_geoid = self.get_replacement(self.global_replace, town_entry, self.place) if replacement_geoid is not None: # There is already a global change that we can apply to this entry. self.matched_count += 1 if self.place.result_type == GeoUtil.Result.STRONG_MATCH: # REPLACE - Output the updated place to ancestry file self.write_updated_place(self.place, town_entry) # Display status to user if self.w.prog.shutdown_requested: self.periodic_update("Creating Import...") else: self.periodic_update("Applying change") elif self.place.result_type == GeoUtil.Result.DELETE: # DELETE - Don't write out this place continue else: # ERROR - We previously found an update, but the GEOID for replacement can no longer be found self.logger.warning( f'***ERROR looking up GEOID=[{replacement_geoid}] for [{town_entry}] ' ) self.place.event_year = int( self.ancestry_file_handler.event_year ) # Set place date to event date (geo names change over time) self.w.original_entry.text = f'** DATABASE ERROR FOR GEOID=[{replacement_geoid}] for [{town_entry}]' self.w.user_entry.text = f'{town_entry}' self.geodata.find_matches(town_entry, self.place, self.w.prog.shutdown_requested) break continue elif self.skiplist.get(town_entry) is not None: # SKIP - User marked place as SKIP - Write out as-is and go to next error self.skip_count += 1 self.periodic_update("Skipping") self.ancestry_file_handler.write_asis(town_entry) continue else: # FOUND a PLACE entry that we don't already have a global replace or skip for # See if it is in the place database self.place.event_year = int( self.ancestry_file_handler.event_year ) # Set place date to event date (geo names change over time) self.geodata.find_matches(town_entry, self.place, self.w.prog.shutdown_requested) if self.place.result_type in GeoUtil.successful_match: # STRONG MATCH if self.place.result_type == GeoUtil.Result.STRONG_MATCH: # FOUND STRONG MATCH - no user verification needed self.matched_count += 1 # Write out line without user verification if self.w.prog.shutdown_requested: self.periodic_update("Creating Import...") else: self.periodic_update("Scanning") # Add to global replace list self.update_global_replacement_list( key=town_entry, geoid=self.place.geoid, prefix=self.place.prefix) self.write_updated_place(self.place, town_entry) self.logger.debug( f'Found Strong Match for {town_entry} Setting DICT' ) continue else: # WEAK MATCH OR MULTIPLE MATCHES if self.w.prog.shutdown_requested: # User requested shutdown - so no user interaction. Write this item out as-is self.review_count += 1 self.periodic_update("Creating Import...") self.w.original_entry.text = " " self.ancestry_file_handler.write_asis(town_entry) continue else: # USER REVIEW - Have user review the match self.logger.debug( f'user review for {town_entry} res= [{self.place.result_type}] ' ) self.w.status.configure(style="Good.TLabel") self.w.original_entry.text = self.place.original_entry # Display place self.w.user_entry.text = self.place.updated_entry # Display place # Break out of loop and have user review the match break else: # NO MATCH FOUND if self.w.prog.shutdown_requested: # User requested shutdown. Write this item out as-is self.review_count += 1 self.periodic_update("Creating Import...") self.w.original_entry.text = " " self.ancestry_file_handler.write_asis(town_entry) continue else: # USER REVIEW - Have user review entry # self.logger.debug(f'User2 review for {town_entry}. status ={self.place.status}') self.w.status.configure(style="Good.TLabel") self.w.original_entry.text = self.place.original_entry # Display place self.w.user_entry.text = self.place.original_entry # Display place # Break out of loop and have user review the item break # Have user review the result self.display_result(self.place)
def load_data_files(self) -> bool: """ Load in data files required for GeoFinder: Load global_replace dictionary, Geodata files and geonames #Returns: Error - True if error occurred """ # Read in Skiplist, Replace list self.skiplist = CachedDictionary.CachedDictionary( self.cache_dir, "skiplist.pkl") self.skiplist.read() self.global_replace = CachedDictionary.CachedDictionary( self.cache_dir, "global_replace.pkl") self.global_replace.read() dict_copy = copy.copy(self.global_replace.dict) # Convert all global_replace items to lowercase for ky in dict_copy: val = self.global_replace.dict.pop(ky) new_key = Normalize.normalize(text=ky, remove_commas=False) self.global_replace.dict[new_key] = val # Read in dictionary listing Geoname features we should include self.feature_code_list_cd = CachedDictionary.CachedDictionary( self.cache_dir, "feature_list.pkl") self.feature_code_list_cd.read() feature_code_list_dct: Dict[str, str] = self.feature_code_list_cd.dict if len(feature_code_list_dct) < 3: self.logger.warning('Feature list is empty.') feature_code_list_dct.clear() feature_list = UtilFeatureFrame.default for feat in feature_list: feature_code_list_dct[feat] = '' self.feature_code_list_cd.write() # Read in dictionary containing countries (ISO2) we should include self.supported_countries_cd = CachedDictionary.CachedDictionary( self.cache_dir, "country_list.pkl") self.supported_countries_cd.read() supported_countries_dct: Dict[str, str] = self.supported_countries_cd.dict # Read in dictionary containing languages (ISO2) we should include self.languages_list_cd = CachedDictionary.CachedDictionary( self.cache_dir, "languages_list.pkl") self.languages_list_cd.read() languages_list_dct: Dict[str, str] = self.languages_list_cd.dict # Initialize geo data self.geodata = Geodata(directory_name=self.directory, progress_bar=self.w.prog, enable_spell_checker=self.enable_spell_checker, show_message=True, exit_on_error=True, languages_list_dct=languages_list_dct, feature_code_list_dct=feature_code_list_dct, supported_countries_dct=supported_countries_dct) # If the list of supported countries is unusually short, display note to user num = self.display_country_note() self.logger.info('{} countries will be loaded'.format(num)) # open Geoname Gazeteer DB - city names, lat/long, etc. error = self.geodata.open() if error: TKHelper.fatal_error(MISSING_FILES) self.w.root.update() self.w.prog.update_progress(100, " ") return error