def __init__(self): self.index_key = Levenshtein_search.populate_wordset(-1, []) try : # py 2 self._doc_to_id = collections.defaultdict(itertools.count(1).next) except AttributeError : # py 3 self._doc_to_id = collections.defaultdict(itertools.count(1).__next__) self.docs = []
def test_remove_doc(self): index = Levenshtein_search.populate_wordset(-1, self.excerpt1) Levenshtein_search.remove_string(index, 'overcoat') results = Levenshtein_search.lookup(index, 'overcoat', 6) assert results == [['went', 6, 0.024390243902439025], ['cold', 6, 0.024390243902439025], ['Versh', 6, 0.04878048780487805], ['overshoes', 4, 0.04878048780487805], ['not', 6, 0.024390243902439025]]
def test_query_overcoat(self): index = Levenshtein_search.populate_wordset(-1, self.excerpt1) results = Levenshtein_search.lookup(index, 'overcoat', 6) assert results == [['overcoat', 0, 0.023809523809523808], ['went', 6, 0.023809523809523808], ['cold', 6, 0.023809523809523808], ['Versh', 6, 0.047619047619047616], ['overshoes', 4, 0.047619047619047616], ['not', 6, 0.023809523809523808]] index = Levenshtein_search.populate_wordset(-1, self.excerpt2) results = Levenshtein_search.lookup(index, 'overcoat', 6) assert results == [['Versh', 6, 0.044444444444444446], ['overshoes', 4, 0.022222222222222223], ['coat', 4, 0.022222222222222223], ['out', 6, 0.044444444444444446], ['here', 6, 0.022222222222222223]]
def __init__(self): self.index_key = Levenshtein_search.populate_wordset(-1, []) try: # py 2 self._doc_to_id = collections.defaultdict(itertools.count(1).next) except AttributeError: # py 3 self._doc_to_id = collections.defaultdict( itertools.count(1).__next__) self.docs = []
def test_index_increment(self): first = Levenshtein_search.populate_wordset(-1, self.excerpt1) second = Levenshtein_search.populate_wordset(-1, self.excerpt2) print(first, second) assert first != second
def test_clear(self): index = Levenshtein_search.populate_wordset(-1, self.excerpt1) Levenshtein_search.clear_wordset(index)
def __init__(self): self.index_key = Levenshtein_search.populate_wordset(-1, []) self._doc_to_id = Enumerator(start=1)
def unindex(self, doc): del self._doc_to_id[doc] Levenshtein_search.clear_wordset(self.index_key) self.index_key = Levenshtein_search.populate_wordset( -1, list(self._doc_to_id))
import Levenshtein_search conn = psycopg2.connect("host='127.0.0.1' port='5432' dbname='benchmark' user='******' password=''") cur = conn.cursor() cur.execute("set schema 'public';") query_word = "\"philippe the original\"" max_dist = 2 sqlquery = "select name from restaurant_nophone_training where levenshtein_less_equal(name, '" + query_word + "', " + str(max_dist) + ") <= " + str(max_dist) + ";" print(sqlquery) starttime = time.clock() cur.execute(sqlquery) results = cur.fetchall() print(str(time.clock() - starttime) + " sec") print(results) print(" ") print("Levenshtein_search algorithm:") cur.execute("select name from restaurant_nophone_training") names = cur.fetchall() namelist = [] for name in names: namelist.append(name[0]) idx = Levenshtein_search.populate_wordset(-1,namelist) starttime = time.clock() results = Levenshtein_search.lookup(idx,query_word,max_dist) print(str(time.clock() - starttime) + " sec") print(results) Levenshtein_search.clear_wordset(idx) conn.close()