def dotest(terms, expected, which): files = filelist(rootdir) terms = words(terms) # print(terms) if which == 0: linear_docs = linear_search(files, terms) # print(filenames(linear_docs)) names = filenames(linear_docs) names.sort() expected.sort() #assert filenames(linear_docs) == expected assert names == expected, "found "+str(names)+" != expected "+str(expected) elif which == 1: index = create_index(files) index_docs = index_search(files, index, terms) # print(filenames(index_docs)) names = filenames(index_docs) names.sort() expected.sort() #assert filenames(index_docs) == expected assert names == expected, "found "+str(names)+" != expected "+str(expected) else: index = myhtable_create_index(files) index_docs = myhtable_index_search(files, index, terms) # print(filenames(index_docs)) names = filenames(index_docs) names.sort() expected.sort() #assert filenames(index_docs) == expected assert names == expected, "found "+str(names)+" != expected "+str(expected)
def test_linear_berlitz_none(): terms = "missspellinnng" files = filelist(rootdir) terms = words(terms) linear_docs = linear_search(files, terms) expected = [] assert filenames(linear_docs)==expected
def test_linear_berlitz(): terms = "hawaii travel" files = filelist(rootdir) terms = words(terms) linear_docs = linear_search(files, terms) expected = ['HistoryHawaii.txt'] assert filenames(linear_docs)==expected
def test_linear_berlitz_none(): terms = "missspellinnng" files = filelist(rootdir) terms = words(terms) linear_docs = linear_search(files, terms) expected = [] assert filenames(linear_docs) == expected
def test_linear_berlitz(): terms = "hawaii travel" files = filelist(rootdir) terms = words(terms) linear_docs = linear_search(files, terms) expected = ['HistoryHawaii.txt'] assert filenames(linear_docs) == expected
def test_myhtable_berlitz(): terms = "hawaii travel" files = filelist(rootdir) terms = words(terms) index = myhtable_create_index(files) myhtable_docs = myhtable_index_search(files, index, terms) expected = ['HistoryHawaii.txt'] assert filenames(myhtable_docs) == expected
def test_myhtable_berlitz_none(): terms = "missspellinnng" files = filelist(rootdir) terms = words(terms) index = myhtable_create_index(files) myhtable_docs = myhtable_index_search(files, index, terms) expected = [] assert filenames(myhtable_docs) == expected
from index_search import index_search, create_index from linear_search import linear_search from myhtable_search import myhtable_index_search, myhtable_create_index from words import filelist, words, results """ Usage: $ python search.py linear ~/data/slate $ python search.py index ~/data/slate $ python search.py myhtable ~/data/slate """ impl = sys.argv[1] rootdir = sys.argv[2] files = filelist(rootdir) # Uncomment the next line to test just the first 100 files instead of all files # files = files[:100] N = len(files) print N, "files" index = None while True: terms = raw_input("Search terms: ") terms = words(terms) if impl == 'linear': docs = linear_search(files, terms) elif impl == 'index': if index is None:
from index_search import index_search, create_index from linear_search import linear_search from myhtable_search import myhtable_index_search, myhtable_create_index from words import filelist, words, results """ Usage: $ python search.py linear ~/data/slate $ python search.py index ~/data/slate $ python search.py myhtable ~/data/slate """ impl = sys.argv[1] rootdir = sys.argv[2] files = filelist(rootdir) # Uncomment the next line to test just the first 100 files instead of all files # files = files[:100] N = len(files) print(N, "files") index = None while True: terms = input("Search terms: ") terms = words(terms) if impl=='linear': docs = linear_search(files, terms) elif impl == 'index': if index is None:
from index_search import index_search, create_index from linear_search import linear_search from myhtable_search import myhtable_index_search, myhtable_create_index from words import filelist, words, results """ Usage: $ python search.py linear ~/data/slate $ python search.py index ~/data/slate $ python search.py myhtable ~/data/slate """ impl = sys.argv[1] # EX: "linear" or "index" or "myhtable" rootdir = sys.argv[2] # EX: "~/data/slate" files = filelist( rootdir ) # This is a list of (.txt) file names, under each sub dir (from "1" to "55") of the "slate" dir # Uncomment the next line to test just the first 100 files instead of all files # files = files[:100] N = len(files) # N will be 4530 print(N, "files") index = None while True: terms = input("Search terms: ") # EX: "Ronald Reagan" terms = words(terms) # EX: ['reagan', 'ronald'] if impl == 'linear': # print("YES")