def test_str_to_list(): table = htable(5) htable_put(table, "parrt", [2, 99, 3942]) htable_put(table, "tombu", [6, 3, 1024, 99, 102342]) assert htable_str( table) == "{tombu:[6, 3, 1024, 99, 102342], parrt:[2, 99, 3942]}" assert htable_buckets_str(table) == """0000->
def myhtable_create_index(files): """ Build an index from word to set of document indexes This does the exact same thing as create_index() except that it uses your htable. As a number of htable buckets, use 4011. Returns a list-of-buckets hashtable representation. """ # dct_index = defaultdict() # Create an empty dict # for file in files: # Iterate through every given file names # s_content = get_text(file) # Turn each file name into a string content # lst_word = words(s_content) # Turn the string content into a list of normalized words # for word in lst_word: # For each normalized words, update the dict by word-file as key-value pairs # if word not in dct_index: # dct_index[word] = {file} # If the key doesn't exist, create one # else: # dct_index[word].add(file) # If the key exist, add the file name into the set of the file names under that word # return dct_index NBUCKETS = 4011 table = htable(NBUCKETS) # Create an empty dict for idx,fname in enumerate(files): # Iterate through every given file names s_content = get_text(fname) # Turn each file name into a string content lst_word = words(s_content) # Turn the string content into a list of normalized words for word in lst_word: # For each normalized words, update the dict by word-file as key-value pairs set_IDs = htable_get(table, word) if set_IDs == None: htable_put(table, word, {idx}) # index or file name else: set_IDs.add(idx) return table
def test_a_few(): table = htable(5) for i in range(1, 11): htable_put(table, i, i) s = htable_str(table) assert s == "{5:5, 10:10, 1:1, 6:6, 2:2, 7:7, 3:3, 8:8, 4:4, 9:9}" s = htable_buckets_str(table) assert s == """0000->5:5, 10:10
def test_str_to_set(): table = htable(5) htable_put(table, "parrt", {2, 99, 3942}) htable_put(table, "tombu", {6, 3, 1024, 99, 102342}) assert htable_str( table ) == "{tombu:set([1024, 3, 99, 6, 102342]), parrt:set([2, 99, 3942])}" assert htable_buckets_str(table) == """0000->
def test_int_to_int(): table = htable(5) for i in range(1, 11): htable_put(table, i, i) s = htable_str(table) assert s=="{5:5, 10:10, 1:1, 6:6, 2:2, 7:7, 3:3, 8:8, 4:4, 9:9}" s = htable_buckets_str(table) assert s == """0000->5:5, 10:10
def myhtable_create_index(files): wordBook = htable(4011) fileIndex = 0 for item in files: fileWords = set(words(get_text(item))) for word in fileWords: htable_put(wordBook,word, fileIndex) fileIndex += 1 return wordBook
def test_str_to_str(): table = htable(5) htable_put(table, "a", "x") htable_put(table, "b", "y") htable_put(table, "c", "z") htable_put(table, "f", "i") htable_put(table, "g", "j") htable_put(table, "k", "k") s = htable_str(table) assert s == '{a:x, f:i, k:k, b:y, g:j, c:z}', "found " + s s = htable_buckets_str(table) assert s == """0000->
def test_str_to_str(): table = htable(5) htable_put(table, "a", "x") htable_put(table, "b", "y") htable_put(table, "c", "z") htable_put(table, "f", "i") htable_put(table, "g", "j") htable_put(table, "k", "k") s = htable_str(table) assert s=='{a:x, f:i, k:k, b:y, g:j, c:z}', "found "+s s = htable_buckets_str(table) assert s == """0000->
def test_replace_str(): table = htable(5) htable_put(table, "a", "x") htable_put(table, "b", "y") htable_put(table, "a", "z") htable_put(table, "a", "i") htable_put(table, "g", "j") htable_put(table, "g", "k") s = htable_str(table) assert s == '{a:i, b:y, g:k}', "found " + s s = htable_buckets_str(table) assert s == """0000->
def myhtable_create_index(files): """ Build an index from word to set of document indexes This does the exact same thing as create_index() except that it uses your htable. As a number of htable buckets, use 4011. Returns a list-of-buckets hashtable representation. """ nbuckets = 4011 table = htable(nbuckets) for value in range(0, len(files)): terms = get_text(files[value]) terms = words(terms) for key in terms: table = htable_put(table, key, {value}) return table
def myhtable_create_index(files): """ Build an index from word to set of document indexes This does the exact same thing as create_index() except that it uses your htable. As a number of htable buckets, use 4011. Returns a list-of-buckets hashtable representation. """ d = htable(4011) # initialize empty htable # k = 0 for k, file in enumerate(files): # loop through files # k = k + 1 wordsInDoc = words(get_text(file)) # print("len doc {:<4d}: {:<6d}".format(k, len(wordsInDoc))) for word in wordsInDoc: # loop through words in that file htable_put(d, word, {files[k]}) # print("word {:d} ({:<14s}), doc {:d}".format(i+1, word, k)) # warning: x6 runtime! #pp = pprint.PrettyPrinter(indent=4) #pp.pprint(d) return d
def myhtable_create_index(files): """ Build an index from word to set of document indexes This does the exact same thing as create_index() except that it uses your htable. As a number of htable buckets, use 4011. Returns a list-of-buckets hashtable representation. """ wordlist = [words(get_text(files[i])) for i in range(len(files))] table = htable(4011) for i in range(len(files)): for j in range(len(wordlist[i])): htable_put(table, wordlist[i][j], set()) for i in range(len(files)): for j in range(len(wordlist[i])): htable_get(table, wordlist[i][j]).add(i) return table
def myhtable_create_index(files): """ Build an index from word to set of document indexes This does the exact same thing as create_index() except that it uses your htable. As a number of htable buckets, use 4011. Returns a list-of-buckets hashtable representation. """ if len(files) <= 0: return None table = htable(4011) for i in range(len(files)): file_content = get_text(files[i]) key_words = words(file_content) for word in key_words: # because the value is a set, whenever a value # is added to hash table here, if the key is # is already in the hash table, the new value # is going to merged to the existing value. htable_put(table, word, set([i])) return table
def test_empty(): table = htable(5) assert htable_str(table) == "{}" assert htable_buckets_str(table) == """0000->
def test_str_to_set(): table = htable(5) htable_put(table, "parrt", {2, 99, 3942}) htable_put(table, "tombu", {6, 3, 1024, 99, 102342}) assert htable_str(table) == "{tombu:set([1024, 3, 99, 6, 102342]), parrt:set([2, 99, 3942])}" assert htable_buckets_str(table) == """0000->
def test_str_to_list(): table = htable(5) htable_put(table, "parrt", [2, 99, 3942]) htable_put(table, "tombu", [6, 3, 1024, 99, 102342]) assert htable_str(table) == "{tombu:[6, 3, 1024, 99, 102342], parrt:[2, 99, 3942]}" assert htable_buckets_str(table) == """0000->
def test_singleon(): table = htable(5) htable_put(table, "parrt", set([99])) assert htable_str(table) == "{parrt:set([99])}" assert htable_buckets_str(table) == """0000->
def test_singleton(): table = htable(5) htable_put(table, "parrt", set([99])) assert htable_str(table) == "{parrt:{99}}" assert htable_buckets_str(table) == """0000->
def test_single(): table = htable(5) htable_put(table, "parrt", 99) assert htable_str(table) == "{parrt:99}" assert htable_buckets_str(table) == """0000->