def test_import_stopwords(self): hashtable = import_stopwords("stop_words.txt", HashTableSepchain()) self.assertEqual(hashtable["unless"], "unless") self.assertRaises(KeyError, hashtable.get, "Parth") hashtable = import_stopwords("stop_words.txt", HashTableLinear()) self.assertEqual(hashtable["unless"], "unless") self.assertRaises(KeyError, hashtable.get, "Parth") hashtable = import_stopwords("stop_words.txt", HashTableQuadratic()) self.assertEqual(hashtable["unless"], "unless") self.assertRaises(KeyError, hashtable.get, "Parth")
def main(): """ Entry point of the program User enters "q" to quit program, and "s:{query} to search Upon searching, a list of relevant files in descending order is displayed """ # takes a directory name as its command line argument dir_name = sys.argv[1] # creates stop_word hash table stop_table = HashTable() stop_table = import_stopwords("stop_words.txt", stop_table) # create an instance of SearchEngine by passing the directory name search_engine = SearchEngine(dir_name, stop_table) # enter an infinite loop print("Enter 'q' to exit program") print("Enter 's:{query}' to search") while True: # prompt user for input raw_query = str(input("Enter query: ")) # if input is "q" if raw_query == "q": break elif raw_query[0:2] == "s:": search_engine.search(raw_query[2:])
def test_SE(self): SE = SearchEngine( "docs", import_stopwords("stop_words.txt", HashTableLinear())) self.assertEqual(SE.doc_length.num_items, 4) self.assertEqual(SE.stopwords, import_stopwords("stop_words.txt", HashTableLinear())) self.assertEqual( SE.search("Computer Science")[0], Pair("docs\\test.txt", 1.0)) self.assertEqual(SE.search("ADT")[0][0], "docs\\data_structure.txt") self.assertEqual(round(SE.search("ADT")[0][1], 2), 0.01) self.assertEqual( SE.search("Hash Table")[1][0], "docs\\data_structure.txt") self.assertEqual(round(SE.search("Hash Table")[1][1], 2), 0.01) list_of_pairs = [ Pair("P", 5), Pair("A", 2), Pair("R", 1), Pair("T", 4), Pair("H", 3) ] self.assertEqual(SE.rank(list_of_pairs), [ Pair("P", 5), Pair("T", 4), Pair("H", 3), Pair("A", 2), Pair("R", 1) ]) self.assertEqual( SE.get_scores(["computer", "science"])[0], Pair("docs\\test.txt", 1.0)) self.assertEqual(SE.get_scores(["every", "nothing", "few"]), []) self.assertEqual(round(SE.get_wf(6), 2), 2.79) self.assertEqual(SE.get_wf(-6), 0) list1 = [ "Automated information retrieval systems of ", "Information retrieval and afterwards say\n" ] list2 = [ 'automated', 'information', 'retrieval', 'systems', 'information', 'retrieval' ] self.assertEqual(SE.parse_words(list1), list2) self.assertEqual( SE.parse_words(["and afterwards say\n", "much without the"]), []) self.assertEqual(SE.read_file("docs\\test.txt"), ["computer science\n"])
def test_linear3(self): ht = HashTableLinear() stop_words = import_stopwords(stop_words.txt, ht) self.assertEqual(stop_words.size(), 305) self.assertTrue(0.3 <= stop_words.load_factor() <= 0.4) self.assertFalse("collision" in stop_words) self.assertTrue("very" in stop_words) self.assertFalse("linear" in stop_words) self.assertTrue("a" in stop_words)
def test_sepchain5(self): ht = HashTableSepchain() stop_words = import_stopwords(FILE, ht) self.assertEqual(stop_words.size(), 305) self.assertTrue(stop_words.load_factor() <= 1.5) self.assertFalse("collision" in stop_words) self.assertTrue("very" in stop_words) self.assertFalse("linear" in stop_words) self.assertTrue("a" in stop_words)
def entry_point(dir_name): ht = HashTableLinear() stop_words = import_stopwords('stop_words.txt', ht) search = SearchEngine(dir_name, stop_words) while True: s = input('Input Search: ') if s == 'q': break scores = search.search(s) print(scores)
def build_stopwords(filename): """ Function to build hash table of stop words from a text list Args: filename (str): path of stop words file """ hash_table = HashTableLinear() stop_words = import_stopwords(filename, hash_table) return stop_words
def main(): """It takes a directory name as its command line argument and continuously askes for user input on what query terms to search for. It will return the relavent files associated with the query terms or inputing q will exit the function and return None """ search_engine = SearchEngine( sys.argv[1], import_stopwords("stop_words.txt", HashTableLinear())) while True: user_input = input( "Type 's:' and what you would like to search for or type 'q' to exit: " ) if user_input == "q": return if "s:" in user_input: user_input = user_input[2::].lower().strip() print(search_engine.search(user_input))
def test_whole_functionality(self): """ Tests the Separate Chain Hash Table Functionality""" filename = 'stop_words.txt' hash_table = HashTableLinear() hash_table = import_stopwords(filename, hash_table) self.assertRaises(KeyError, hash_table.get, 'BubbleGum') self.assertTrue('to' in hash_table) second_hash = HashTableLinear() second_hash.put('three', 'three') third_hash = HashTableLinear() third_hash.put('three', 'three') self.assertEqual(second_hash, third_hash) self.assertNotEqual(hash_table, second_hash) self.assertNotEqual(hash_table, 5) expected = "Hash_val = 0: None\n" \ "Hash_val = 1: None\n" \ "Hash_val = 2: None\n" \ "Hash_val = 3: None\n" \ "Hash_val = 4: ('three', 'three')\n" \ "Hash_val = 5: None\n" \ "Hash_val = 6: None\n" \ "Hash_val = 7: None\n" \ "Hash_val = 8: None\n" \ "Hash_val = 9: None\n" \ "Hash_val = 10: None\n" self.assertEqual(expected, repr(second_hash)) second_hash['four'] = 'four' self.assertEqual(second_hash['four'], 'four') second_hash['five'] = 'five' self.assertEqual(0, hash_table.get('from')) self.assertFalse(second_hash.contains('p')) self.assertTrue(second_hash.contains('five')) second_hash.remove('five') self.assertFalse(second_hash.contains('five')) self.assertRaises(KeyError, second_hash.remove, 'p') self.assertEqual(1, third_hash.size()) self.assertEqual(0, third_hash.collisions())
def main(): # execute unit tests directory = input("please enter a directory name\n") yeet = True while yeet: command = input("press q to exit\n" "press s to search\n" "What would you like to do?\n") if command == "q": break elif command == "s": search = SearchEngine( directory, import_stopwords("stop_words.txt", HashTableLinear())) else: print("that is not a valid command\n") continue new_query = [input("what would you like to search?\n")] query_string = search.parse_words(new_query) search.search(query_string)
def main(): """The main entry point. Displays instructions and asks user for a directory to search in, then continually requests search queries until the user chooses to quit. """ print("================== INSTRUCTIONS ==================") print("1. Enter the name of a directory to search in.") print("2. Enter a search query, or quit. To search,") print(" prepend your search query with 's:'. For example,") print(" 's:Computer Science'. Type 'q:' to quit.") stopwords = import_stopwords("stop_words.txt", HashTable()) directory = input("\nEnter a search directory: ") engine = SearchEngine(directory, stopwords) running = True while running: query = input("\nEnter a command: ").lower() if query[:2] == 's:': results = engine.search(query[2:]) for result in results: print(f"{result[0]}: {result[1]}") elif query == 'q:': running = False
def setUp(self): self.dir = "docs" stopwords = import_stopwords("stop_words.txt", HashTable()) self.se = SearchEngine(self.dir, stopwords)