def _find_suggestions(self): from yt.funcs import levenshtein_distance field = self.field ds = self.ds suggestions = {} if not isinstance(field, tuple): ftype, fname = None, field elif field[1] is None: ftype, fname = None, field[0] else: ftype, fname = field # Limit the suggestions to a distance of 3 (at most 3 edits) # This is very arbitrary, but is picked so that... # - small typos lead to meaningful suggestions (e.g. `densty` -> `density`) # - we don't suggest unrelated things (e.g. `pressure` -> `density` has a distance # of 6, we definitely do not want it) # A threshold of 3 seems like a good middle point. max_distance = 3 # Suggest (ftype, fname), with alternative ftype for ft, fn in ds.derived_field_list: if fn.lower() == fname.lower() and (ftype is None or ft.lower() != ftype.lower()): suggestions[ft, fn] = 0 if ftype is not None: # Suggest close matches using levenshtein distance fields_str = {_: str(_).lower() for _ in ds.derived_field_list} field_str = str(field).lower() for (ft, fn), fs in fields_str.items(): distance = levenshtein_distance(field_str, fs, max_dist=max_distance) if distance < max_distance: if (ft, fn) in suggestions: continue suggestions[ft, fn] = distance # Return suggestions sorted by increasing distance (first are most likely) self.suggestions = [(ft, fn) for ( ft, fn), distance in sorted(suggestions.items(), key=lambda v: v[1])]
def test_levenshtein(): assert_equal(levenshtein_distance("abcdef", "abcdef"), 0) # Deletions / additions assert_equal(levenshtein_distance("abcdef", "abcde"), 1) assert_equal(levenshtein_distance("abcdef", "abcd"), 2) assert_equal(levenshtein_distance("abcdef", "abc"), 3) assert_equal(levenshtein_distance("abcdf", "abcdef"), 1) assert_equal(levenshtein_distance("cdef", "abcdef"), 2) assert_equal(levenshtein_distance("bde", "abcdef"), 3) # Substitutions assert_equal(levenshtein_distance("abcd", "abc_"), 1) assert_equal(levenshtein_distance("abcd", "ab__"), 2) assert_equal(levenshtein_distance("abcd", "a___"), 3) assert_equal(levenshtein_distance("abcd", "____"), 4) # Deletion + Substitutions assert_equal(levenshtein_distance("abcd", "abc_z"), 2) assert_equal(levenshtein_distance("abcd", "ab__zz"), 4) assert_equal(levenshtein_distance("abcd", "a___zzz"), 6) assert_equal(levenshtein_distance("abcd", "____zzzz"), 8) # Max distance assert_equal(levenshtein_distance("abcd", "", max_dist=0), 1) assert_equal(levenshtein_distance("abcd", "", max_dist=3), 4) assert_equal(levenshtein_distance("abcd", "", max_dist=10), 4) assert_equal(levenshtein_distance("abcd", "", max_dist=1), 2) assert_equal(levenshtein_distance("abcd", "a", max_dist=2), 3) assert_equal(levenshtein_distance("abcd", "ad", max_dist=2), 2) assert_equal(levenshtein_distance("abcd", "abd", max_dist=2), 1) assert_equal(levenshtein_distance("abcd", "abcd", max_dist=2), 0)